12 import pygraphviz as pgv
16 from amara import bindery
17 from amara.xslt import transform
18 from Cheetah.Template import Template
20 parser = argparse.ArgumentParser(description='Process docbook article tree.')
21 parser.add_argument('--style', nargs='?',
22 default=os.path.dirname(os.getcwd())+'/style/default/')
23 parser.add_argument('--output', nargs='?',
24 default=os.path.dirname(os.getcwd())+'/htdocs/')
25 args = parser.parse_args()
27 style_xslt = args.style+"docbook.xsl"
28 outputdir = args.output
30 tmptarget = tempfile.mkdtemp()+'/'
32 valid_scripts = ['.py','.pl']
38 except OSError as exc: # Python >2.5
39 if exc.errno == errno.EEXIST:
43 def publish(src,target):
44 cmd = ["rsync","-a","--delete",src,target]
45 retcode = subprocess.call(cmd)
47 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
49 def ssh_cmd(target, command):
52 cmd = ["ssh",t[0],c[0],c[1],t[1]]
53 retcode = subprocess.call(cmd)
55 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
57 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
58 u'xi': u'http://www.w3.org/2001/XInclude',
59 u'xl': u'http://www.w3.org/1999/xlink',
60 u'html' : u'http://www.w3.org/1999/xhtml'}
63 """Class containing the state of the directory with articles"""
69 for dirname, dirnames, filenames in os.walk(self._cwd):
70 for filename in filenames:
71 if fnmatch.fnmatch(filename, '*.xml'):
72 file_ = os.path.join(dirname,filename)
73 doc = bindery.parse(file_, prefixes=PREFIXES)
74 title = doc.xml_select(u'/db:article/db:info/db:title')
75 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
77 base = file_.split('.')[1]
78 link = base.replace('index','')
79 self._tree.append(link)
82 return set(self._tree)
85 """Class representing a version of a webpage"""
86 def __init__(self,link,page):
94 self._rendered_article = None
100 return set(self._resources)
105 def set_article(self,art):
106 self._rendered_article = art
109 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
110 if self._doc.xml_select(u'/db:article/db:info/db:title'):
111 self._title = unicode(self._doc.article.info.title)
112 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
113 self._menu = unicode(self._doc.article.info.titleabbrev)
115 dirname = os.path.dirname(self._file)
116 code = self._doc.xml_select(u"//xi:include[@parse='text']")
119 (p, ext) = os.path.splitext(c.href)
120 if ext in valid_scripts:
122 exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
123 if c.xml_select(u"//xi:include[@accept-language]"):
124 alang = c.xml_attributes[None, "accept-language"]
125 exe.append("lang="+alang)
126 if c.xml_select(u"//xi:include[@xpointer]"):
127 exe.append("xptr="+c.xpointer)
128 xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
129 xstr = bindery.parse(str(xml.stdout.read()))
130 idp = c.xml_index_on_parent
131 for x in xstr.xml_children:
132 c.xml_parent.xml_insert(idp,x)
133 c.xml_parent.xml_remove(c)
135 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
136 rf = os.path.join(dirname,r.href)
137 if os.path.isfile(rf):
138 self._resources.append(rf)
139 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
140 im = os.path.join(dirname,i.fileref)
141 if os.path.isfile(im):
142 self._resources.append(im)
143 for i in self._doc.xml_select(u"//html:form[@action]"):
144 pyscript = re.split('\.py',i.action,1)[0]+'.py'
145 im = os.path.join(dirname,pyscript)
146 if os.path.isfile(im):
147 self._resources.append(im)
150 # amara can not handle the docbook stylesheets
151 # xmlarticle = transform(doc,style_xslt)
153 dirname = os.path.dirname(self._file)
155 infile = os.path.basename(tempfile.mktemp())
156 outfile = tempfile.mktemp()
157 tfi = open(infile,'w')
158 tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
160 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
161 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
162 retcode = subprocess.call(cmd)
164 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
165 tfo = open(outfile,'r')
166 self._rendered_article = tfo.read()
172 def template(self,sitemap):
173 htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
174 levelmenu = sitemap.gen_menu(self._lang,self,"tree")
175 langmenu = sitemap.lang_menu(self._lang,self._link)
176 template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
177 searchList=[{'title':self._title},
179 {'article':self._rendered_article},
180 {'levelmenu':levelmenu},
181 {'langmenu':langmenu}])
182 outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
183 mkdir_p(os.path.dirname(outfile))
184 out = open(outfile, 'w')
185 out.write(str(template))
190 """Class representing a webpage on the site"""
191 def __init__(self,link):
193 # find the representations of the link.
196 if self._link[-1] == '/':
198 lang = self._scan_languages(path)
200 self._pages.append(Page(self,l))
202 def add_page(self,l):
203 self._pages.append(Page(self,l))
205 def _scan_languages(self,path):
207 for l in glob.glob('.'+path+'*'):
209 if len(ls) > 3 and ls[3] == 'xml':
210 lang.append((ls[2],l))
217 for page in self._pages:
222 for page in self._pages:
223 p.append(page.language())
227 for page in self._pages:
230 def template(self,sitemap):
231 for page in self._pages:
232 page.template(sitemap)
235 for page in self._pages:
236 if page.language()==lang:
242 for page in self._pages:
243 res = res.union(page.resources())
248 def __init__(self,token,value):
260 return self._children
267 return self.inorder(self._root)
272 for x in self.inorder(l.children()):
275 def _add(self,trie, key, content):
279 node = Node(k,content)
284 self._add(ch.children(), key, content)
286 def add(self,key, content):
287 self._add(self._root, key, content)
289 def _graph(self, trie, G):
291 G.add_node(l.token())
292 for ch in l.children():
293 G.add_edge(l.token(),ch.token())
294 self._graph(l.children(), G)
297 G = pgv.AGraph(directed=True)
298 G.add_node("sitemap")
299 for ch in self._root:
300 G.add_edge("sitemap",ch.token())
301 self._graph(self._root, G)
306 def _menu(self, trie, lang, page, css):
307 html = "<ul%s>\n" % css
310 p = l.value().page(lang)
312 sel = ' class="selected"'
314 html += '<li%s><a href="%s">%s</a>\n' \
315 % (sel,l.value().link(),p.menu())
317 html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
318 % (sel,l.value().link(), l.value().page('en').menu())
320 html += self._menu(l.children(), lang, page, "")
324 def menu(self,lang,page,cssclass):
327 css = ' class="'+cssclass+'"'
328 return self._menu(self._root, lang, page, css)
331 """Class keeping the internal site structure"""
333 self._file = 'sitemap.txt'
335 self._sitelang = set()
336 self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
339 def add_link(self, link):
340 tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
341 self._tree.add(tokens,Link(link))
344 f = open(self._file,'w')
345 f.write('\n'.join(link.link() for link in self._tree))
351 sml = f.read().split()
355 except IOError, what_error:
356 print 'INFO: Could not read sitemap.txt - one will be created'
359 return set(link.link() for link in self._tree)
363 for link in self._tree:
366 print "Prepare [%5.2f s]" % (round(t2-t1,2))
367 for link in self._tree:
368 self._sitelang = self._sitelang.union(set(link.languages()))
369 for tran in self._sitelang:
371 self._tranlang[tran] = gettext.translation('iso_639_3',
374 print "Language [%5.2f s]" % (round(t3-t2,2))
375 for link in self._tree:
378 print "Render [%5.2f s]" % (round(t4-t3,2))
379 for link in self._tree:
382 print "Template [%5.2f s]" % (round(t5-t4,2))
386 for link in self._tree:
387 res = res.union(link.resources())
389 outfile = tmptarget+f
390 mkdir_p(os.path.dirname(outfile))
391 shutil.copyfile(f,outfile)
392 print "Resources[%5.2f s]" % (round(t6-t5,2))
393 sitmaplink = Link('/sitemap')
394 for l in self._sitelang:
395 sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
396 for l in self._sitelang:
397 sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
398 sitmaplink.page(l).template(self)
400 print "Sitemap [%5.2f s]" % (round(t7-t6,2))
405 def gen_menu(self,lang,page,cssclass):
406 return self._tree.menu(lang,page,cssclass)
408 def lang_menu(self,lang,link):
410 for l in link.languages():
411 isoxml = u"//iso_639_3_entry[@*='"+l+"']"
412 ln = self._isocode.xml_select(isoxml)[0].name
414 ln = self._tranlang[lang].gettext(ln)
419 html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
424 ssh_cmd(args.output,"mkdir -p")
425 publish(tmptarget, args.output)
426 for res in ["css","images","js","favicon.ico"]:
427 if (os.path.exists(args.style+res)):
428 publish(args.style+res, args.output)
429 ssh_cmd(args.output,"chmod a+rx")
438 missing = dir_.set() - sitemap.set()
439 removed = sitemap.set() - dir_.set()
441 print page+' pages missing!!'
443 print 'adding missing page '+page
444 sitemap.add_link(page)
445 if len(missing)+len(removed) != 0:
446 print 'writing new sitemap - please adjust if needed'
455 print "Publish [%5.2f s]" % (round(t2-t1,2))
456 print "Total [%5.2f s]" % (round(t2-ts,2))