12 import pygraphviz as pgv
16 from amara import bindery
17 from amara.xslt import transform
18 from Cheetah.Template import Template
20 parser = argparse.ArgumentParser(description='Process docbook article tree.')
21 parser.add_argument('--style', nargs='?',
22 default=os.path.dirname(os.getcwd())+'/style/default/')
23 parser.add_argument('--output', nargs='?',
24 default=os.path.dirname(os.getcwd())+'/htdocs/')
25 args = parser.parse_args()
27 style_xslt = args.style+"docbook.xsl"
28 style_tmpl = args.style+"index.en.html.tmpl"
29 outputdir = args.output
31 tmptarget = tempfile.mkdtemp()+'/'
33 valid_scripts = ['.py','.pl']
39 except OSError as exc: # Python >2.5
40 if exc.errno == errno.EEXIST:
44 def publish(src,target):
45 cmd = ["rsync","-a","--delete",src,target]
46 retcode = subprocess.call(cmd)
48 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
51 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
52 u'xi': u'http://www.w3.org/2001/XInclude',
53 u'xl': u'http://www.w3.org/1999/xlink'}
56 """Class containing the state of the directory with articles"""
62 for dirname, dirnames, filenames in os.walk(self._cwd):
63 for filename in filenames:
64 if fnmatch.fnmatch(filename, '*.xml'):
65 file_ = os.path.join(dirname,filename)
66 doc = bindery.parse(file_, prefixes=PREFIXES)
67 title = doc.xml_select(u'/db:article/db:info/db:title')
68 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
70 base = file_.split('.')[1]
71 link = base.replace('index','')
72 self._tree.append(link)
75 return set(self._tree)
78 """Class representing a version of a webpage"""
79 def __init__(self,link,page):
87 self._rendered_article = None
93 return set(self._resources)
98 def set_article(self,art):
99 self._rendered_article = art
102 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
103 if self._doc.xml_select(u'/db:article/db:info/db:title'):
104 self._title = unicode(self._doc.article.info.title)
105 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
106 self._menu = unicode(self._doc.article.info.titleabbrev)
108 dirname = os.path.dirname(self._file)
109 code = self._doc.xml_select(u"//xi:include[@parse='text']")
112 (p, ext) = os.path.splitext(c.href)
113 if ext in valid_scripts:
114 exe = os.path.join(os.path.abspath(dirname)+'/'+c.href)
115 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
116 xstr = bindery.parse(str(xml.stdout.read()))
117 idp = c.xml_index_on_parent
118 for x in xstr.xml_children:
119 c.xml_parent.xml_insert(idp,x)
120 c.xml_parent.xml_remove(c)
122 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
123 rf = os.path.join(dirname,r.href)
124 if os.path.isfile(rf):
125 self._resources.append(rf)
126 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
127 im = os.path.join(dirname,i.fileref)
128 if os.path.isfile(im):
129 self._resources.append(im)
132 # amara can not handle the docbook stylesheets
133 # xmlarticle = transform(doc,style_xslt)
135 dirname = os.path.dirname(self._file)
137 infile = os.path.basename(tempfile.mktemp())
138 outfile = tempfile.mktemp()
139 tfi = open(infile,'w')
140 tfi.write(self._doc.xml_encode())
142 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
143 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
144 retcode = subprocess.call(cmd)
146 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
147 tfo = open(outfile,'r')
148 self._rendered_article = tfo.read()
154 def template(self,sitemap):
155 htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
156 levelmenu = sitemap.gen_menu(self._lang,self,"tree")
157 template = Template(file=style_tmpl,
158 langmenu = sitemap.lang_menu(self._lang,self._link)
159 searchList=[{'title':self._title},
161 {'article':self._rendered_article},
162 {'levelmenu':levelmenu},
163 {'langmenu':langmenu}])
164 outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
165 mkdir_p(os.path.dirname(outfile))
166 out = open(outfile, 'w')
167 out.write(str(template))
172 """Class representing a webpage on the site"""
173 def __init__(self,link):
175 # find the representations of the link.
178 if self._link[-1] == '/':
180 lang = self._scan_languages(path)
182 self._pages.append(Page(self,l))
184 def add_page(self,l):
185 self._pages.append(Page(self,l))
187 def _scan_languages(self,path):
189 for l in glob.glob('.'+path+'*'):
191 if len(ls) > 3 and ls[3] == 'xml':
192 lang.append((ls[2],l))
199 for page in self._pages:
204 for page in self._pages:
205 p.append(page.language())
209 for page in self._pages:
212 def template(self,sitemap):
213 for page in self._pages:
214 page.template(sitemap)
217 for page in self._pages:
218 if page.language()==lang:
224 for page in self._pages:
225 res = res.union(page.resources())
230 def __init__(self,token,value):
242 return self._children
249 return self.inorder(self._root)
254 for x in self.inorder(l.children()):
257 def _add(self,trie, key, content):
261 node = Node(k,content)
266 self._add(ch.children(), key, content)
268 def add(self,key, content):
269 self._add(self._root, key, content)
271 def _graph(self, trie, G):
273 G.add_node(l.token())
274 for ch in l.children():
275 G.add_edge(l.token(),ch.token())
276 self._graph(l.children(), G)
279 G = pgv.AGraph(directed=True)
280 G.add_node("sitemap")
281 for ch in self._root:
282 G.add_edge("sitemap",ch.token())
283 self._graph(self._root, G)
288 def _menu(self, trie, lang, page, css):
289 html = "<ul%s>\n" % css
292 p = l.value().page(lang)
294 sel = ' class="selected"'
296 html += '<li%s><a href="%s">%s</a>\n' \
297 % (sel,l.value().link(),p.menu())
299 html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
300 % (sel,l.value().link(), l.value().page('en').menu())
302 html += self._menu(l.children(), lang, page, "")
306 def menu(self,lang,page,cssclass):
309 css = ' class="'+cssclass+'"'
310 return self._menu(self._root, lang, page, css)
313 """Class keeping the internal site structure"""
315 self._file = 'sitemap.txt'
317 self._sitelang = set()
318 self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
321 def add_link(self, link):
322 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
323 self._tree.add(tokens,Link(link))
326 f = open(self._file,'w')
327 f.write('\n'.join(link.link() for link in self._tree))
333 sml = f.read().split()
337 except IOError, what_error:
338 print 'INFO: Could not read sitemap.txt - one will be created'
341 return set(link.link() for link in self._tree)
345 for link in self._tree:
348 print "Prepare [%5.2f s]" % (round(t2-t1,2))
349 for link in self._tree:
350 self._sitelang = self._sitelang.union(set(link.languages()))
351 for tran in self._sitelang:
353 self._tranlang[tran] = gettext.translation('iso_639_3', languages=[tran])
355 print "Language [%5.2f s]" % (round(t3-t2,2))
356 for link in self._tree:
359 print "Render [%5.2f s]" % (round(t4-t3,2))
360 for link in self._tree:
363 print "Template [%5.2f s]" % (round(t5-t4,2))
367 for link in self._tree:
368 res = res.union(link.resources())
370 outfile = tmptarget+f
371 mkdir_p(os.path.dirname(outfile))
372 shutil.copyfile(f,outfile)
373 print "Resources[%5.2f s]" % (round(t6-t5,2))
374 sitmaplink = Link('/sitemap')
375 for l in self._sitelang:
376 sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
377 for l in self._sitelang:
378 sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
379 sitmaplink.page(l).template(self)
381 print "Sitemap [%5.2f s]" % (round(t7-t6,2))
386 def gen_menu(self,lang,page,cssclass):
387 return self._tree.menu(lang,page,cssclass)
389 def lang_menu(self,lang,link):
391 for l in link.languages():
392 isoxml = u"//iso_639_3_entry[@*='"+l+"']"
393 ln = self._isocode.xml_select(isoxml)[0].name
395 ln = self._tranlang[lang].gettext(ln)
400 html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
405 publish(tmptarget, args.output)
406 publish(args.style+"css", args.output)
407 publish(args.style+"images",args.output)
409 def generateSitemap():
412 sfile = open('sitemap.txt')
413 flist = sfile.read().split()
416 sitemap.append(dict(link=f))
417 except IOError, what_error:
418 print 'Sitemap missing - generating one.'
420 for dirname, dirnames, filenames in os.walk('.'):
421 for filename in filenames:
422 if fnmatch.fnmatch(filename, '*.xml'):
423 xfile = os.path.join(dirname,filename)
424 doc = bindery.parse(xfile,
425 prefixes={u'db': u'http://docbook.org/ns/docbook',
426 u'xi': u'http://www.w3.org/2001/XInclude',
427 u'xl': u'http://www.w3.org/1999/xlink'})
428 title = doc.xml_select(u'/db:article/db:info/db:title')
429 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
430 code = doc.xml_select(u"//xi:include[@parse='text']")
431 resource = doc.xml_select(u"//db:link[@xl:href]")
432 image = doc.xml_select(u"//db:imagedata[@fileref]")
435 (p, ext) = os.path.splitext(c.href)
436 if ext in valid_scripts:
441 base = xfile.split('.')[1]
442 link = base.replace('index','')
443 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
446 rf = os.path.join(dirname,r.href)
447 if os.path.isfile(rf):
450 im = os.path.join(dirname,i.fileref)
451 if os.path.isfile(im):
453 page = dict(title=unicode(doc.article.info.title),
454 menu=unicode(doc.article.info.titleabbrev),
455 output=os.path.join(dirname,
456 filename.replace('xml','html')),
462 if l['link'] == link:
466 print "adding "+link+" to sitemap"
470 sfile = open('sitemap.txt','w')
472 sfile.write(l['link']+'\n')
476 def expandXincludeTxt(page):
477 doc = bindery.parse(page['file'],
478 prefixes={u'db': u'http://docbook.org/ns/docbook',
479 u'xi': u'http://www.w3.org/2001/XInclude'})
481 code = doc.xml_select(u"//xi:include[@parse='text']")
483 (p, ext) = os.path.splitext(c.href)
484 if ext in valid_scripts:
485 exe = os.path.join(os.path.abspath(c.href))
486 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
487 xstr = bindery.parse(str(xml.stdout.read()))
488 id = c.xml_index_on_parent
489 for x in xstr.xml_children:
490 c.xml_parent.xml_insert(id,x)
491 c.xml_parent.xml_remove(c)
494 def xsltConvert(doc):
495 # amara can not handle the docbook stylesheets
496 # xmlarticle = transform(doc,style_xslt)
498 rundir = os.path.dirname(page['file'])
500 infile = os.path.basename(tempfile.mktemp())
501 outfile = tempfile.mktemp()
502 tfi = open(infile,'w')
503 tfi.write(doc.xml_encode())
505 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
506 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
507 retcode = subprocess.call(cmd)
509 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
510 tfo = open(outfile,'r')
518 def genMenu(page,sitemap,slevel,elevel):
521 if elevel == MAXLEVEL or elevel == 1 or page == None:
525 html = '<ul class="tree">\n'
526 idx = sitemap.index(page)
527 while (sitemap[idx]['level'] == page['level']):
529 title = sitemap[idx]['menu']
531 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
532 sm.append(sitemap[idx])
537 if slevel > p['level'] or elevel < p['level']:
539 if not title and p['link'] == '/':
542 if oldlevel < p['level']:
544 elif oldlevel > p['level']:
545 if p['link'][-1] == '/':
547 html+='</ul>\n</li>\n'
548 if page != None and page == p:
549 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
551 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
552 if p['link'][-1] != '/' or p['link'] == '/':
554 oldlevel = p['level']
558 def writeToTemplate(page,doc,sitemap):
559 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
560 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
561 template = Template(file=style_tmpl,
562 searchList=[{'title':page['title']},
565 {'levelmenu':levelmenu},
566 {'levelname':levelname}])
567 outfile = tmptarget+page['output']
568 mkdir_p(os.path.dirname(outfile))
569 out = open(outfile, 'w')
570 out.write(str(template))
572 for r in page['res']:
573 mkdir_p(os.path.dirname(tmptarget+r))
574 shutil.copyfile(r, tmptarget+r)
576 def createSitemap(sitemap):
577 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
578 template = Template(file=style_tmpl,
585 outfile = tmptarget+'sitemap.en.html'
586 mkdir_p(os.path.dirname(outfile))
587 out = open(outfile, 'w')
588 out.write(str(template))
597 missing = dir_.set() - sitemap.set()
598 removed = sitemap.set() - dir_.set()
600 print page+' pages missing!!'
602 print 'adding missing page '+page
603 sitemap.add_link(page)
604 if len(missing & removed) != 0:
605 print 'writing new sitemap - please adjust if needed'
614 print "Publish [%5.2f s]" % (round(t2-t1,2))
616 sitemap = generateSitemap()
617 tmptarget = tempfile.mkdtemp()+'/'
621 print "Page : %-30s %30s" % (page['link'],
622 time.ctime(os.stat(page['file']).st_mtime)),
623 doc = expandXincludeTxt(page)
624 pubdoc = xsltConvert(doc)
625 writeToTemplate(page,pubdoc,sitemap)
627 print "[%5.2f s]" % (round(t2-t1,2))
630 print "Total time\t\t\t\t\t\t\t [%5.2f s]" % (round(tot,2))
631 createSitemap(sitemap)
632 publish(tmptarget, args.output)
633 publish(args.style+"css", args.output)
634 publish(args.style+"images",args.output)