12 import pygraphviz as pgv
15 from amara import bindery
16 from amara.xslt import transform
17 from Cheetah.Template import Template
19 parser = argparse.ArgumentParser(description='Process docbook article tree.')
20 parser.add_argument('--style', nargs='?',
21 default=os.path.dirname(os.getcwd())+'/style/default/')
22 parser.add_argument('--output', nargs='?',
23 default=os.path.dirname(os.getcwd())+'/htdocs/')
24 args = parser.parse_args()
26 style_xslt = args.style+"docbook.xsl"
27 style_tmpl = args.style+"index.en.html.tmpl"
28 outputdir = args.output
30 tmptarget = tempfile.mkdtemp()+'/'
32 valid_scripts = ['.py','.pl']
38 except OSError as exc: # Python >2.5
39 if exc.errno == errno.EEXIST:
43 def publish(src,target):
44 cmd = ["rsync","-a","--delete",src,target]
45 retcode = subprocess.call(cmd)
47 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
50 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
51 u'xi': u'http://www.w3.org/2001/XInclude',
52 u'xl': u'http://www.w3.org/1999/xlink'}
55 """Class containing the state of the directory with articles"""
61 for dirname, dirnames, filenames in os.walk(self._cwd):
62 for filename in filenames:
63 if fnmatch.fnmatch(filename, '*.xml'):
64 file_ = os.path.join(dirname,filename)
65 doc = bindery.parse(file_, prefixes=PREFIXES)
66 title = doc.xml_select(u'/db:article/db:info/db:title')
67 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
69 base = file_.split('.')[1]
70 link = base.replace('index','')
71 self._tree.append(link)
74 return set(self._tree)
77 """Class representing a version of a webpage"""
78 def __init__(self,link,page):
86 self._rendered_article = None
92 return set(self._resources)
97 def set_article(self,art):
98 self._rendered_article = art
101 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
102 if self._doc.xml_select(u'/db:article/db:info/db:title'):
103 self._title = unicode(self._doc.article.info.title)
104 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
105 self._menu = unicode(self._doc.article.info.titleabbrev)
107 dirname = os.path.dirname(self._file)
108 code = self._doc.xml_select(u"//xi:include[@parse='text']")
111 (p, ext) = os.path.splitext(c.href)
112 if ext in valid_scripts:
113 exe = os.path.join(os.path.abspath(dirname)+'/'+c.href)
114 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
115 xstr = bindery.parse(str(xml.stdout.read()))
116 idp = c.xml_index_on_parent
117 for x in xstr.xml_children:
118 c.xml_parent.xml_insert(idp,x)
119 c.xml_parent.xml_remove(c)
121 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
122 rf = os.path.join(dirname,r.href)
123 if os.path.isfile(rf):
124 self._resources.append(rf)
125 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
126 im = os.path.join(dirname,i.fileref)
127 if os.path.isfile(im):
128 self._resources.append(im)
131 # amara can not handle the docbook stylesheets
132 # xmlarticle = transform(doc,style_xslt)
134 dirname = os.path.dirname(self._file)
136 infile = os.path.basename(tempfile.mktemp())
137 outfile = tempfile.mktemp()
138 tfi = open(infile,'w')
139 tfi.write(self._doc.xml_encode())
141 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
142 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
143 retcode = subprocess.call(cmd)
145 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
146 tfo = open(outfile,'r')
147 self._rendered_article = tfo.read()
153 def template(self,sitemap):
154 htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
155 levelmenu = sitemap.gen_menu(self._lang,self,"tree")
156 template = Template(file=style_tmpl,
157 langmenu = sitemap.lang_menu(self._lang,self._link)
158 searchList=[{'title':self._title},
160 {'article':self._rendered_article},
161 {'levelmenu':levelmenu},
162 {'langmenu':langmenu}])
163 outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
164 mkdir_p(os.path.dirname(outfile))
165 out = open(outfile, 'w')
166 out.write(str(template))
171 """Class representing a webpage on the site"""
172 def __init__(self,link):
174 # find the representations of the link.
177 if self._link[-1] == '/':
179 lang = self._scan_languages(path)
181 self._pages.append(Page(self,l))
183 def add_page(self,l):
184 self._pages.append(Page(self,l))
186 def _scan_languages(self,path):
188 for l in glob.glob('.'+path+'*'):
190 if len(ls) > 3 and ls[3] == 'xml':
191 lang.append((ls[2],l))
198 for page in self._pages:
203 for page in self._pages:
204 p.append(page.language())
208 for page in self._pages:
211 def template(self,sitemap):
212 for page in self._pages:
213 page.template(sitemap)
216 for page in self._pages:
217 if page.language()==lang:
223 for page in self._pages:
224 res = res.union(page.resources())
229 def __init__(self,token,value):
241 return self._children
248 return self.inorder(self._root)
253 for x in self.inorder(l.children()):
256 def _add(self,trie, key, content):
260 node = Node(k,content)
265 self._add(ch.children(), key, content)
267 def add(self,key, content):
268 self._add(self._root, key, content)
270 def _graph(self, trie, G):
272 G.add_node(l.token())
273 for ch in l.children():
274 G.add_edge(l.token(),ch.token())
275 self._graph(l.children(), G)
278 G = pgv.AGraph(directed=True)
279 G.add_node("sitemap")
280 for ch in self._root:
281 G.add_edge("sitemap",ch.token())
282 self._graph(self._root, G)
287 def _menu(self, trie, lang, page, css):
288 html = "<ul%s>\n" % css
291 p = l.value().page(lang)
293 sel = ' class="selected"'
295 html += '<li%s><a href="%s">%s</a>\n' \
296 % (sel,l.value().link(),p.menu())
298 html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
299 % (sel,l.value().link(), l.value().page('en').menu())
301 html += self._menu(l.children(), lang, page, "")
305 def menu(self,lang,page,cssclass):
308 css = ' class="'+cssclass+'"'
309 return self._menu(self._root, lang, page, css)
312 """Class keeping the internal site structure"""
314 self._file = 'sitemap.txt'
316 self._sitelang = set()
317 self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
320 def add_link(self, link):
321 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
322 self._tree.add(tokens,Link(link))
325 f = open(self._file,'w')
326 f.write('\n'.join(link.link() for link in self._tree))
332 sml = f.read().split()
336 except IOError, what_error:
337 print 'INFO: Could not read sitemap.txt - one will be created'
340 return set(link.link() for link in self._tree)
344 for link in self._tree:
347 print "Prepare [%5.2f s]" % (round(t2-t1,2))
348 for link in self._tree:
349 self._sitelang = self._sitelang.union(set(link.languages()))
350 for tran in self._sitelang:
352 self._tranlang[tran] = gettext.translation('iso_639_3', languages=[tran])
354 print "Language [%5.2f s]" % (round(t3-t2,2))
355 for link in self._tree:
358 print "Render [%5.2f s]" % (round(t4-t3,2))
359 for link in self._tree:
362 print "Template [%5.2f s]" % (round(t5-t4,2))
366 for link in self._tree:
367 res = res.union(link.resources())
369 outfile = tmptarget+f
370 mkdir_p(os.path.dirname(outfile))
371 shutil.copyfile(f,outfile)
372 print "Resources[%5.2f s]" % (round(t6-t5,2))
373 sitmaplink = Link('/sitemap')
374 for l in self._sitelang:
375 sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
376 for l in self._sitelang:
377 sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
378 sitmaplink.page(l).template(self)
380 print "Sitemap [%5.2f s]" % (round(t7-t6,2))
385 def gen_menu(self,lang,page,cssclass):
386 return self._tree.menu(lang,page,cssclass)
388 def lang_menu(self,lang,link):
390 for l in link.languages():
391 isoxml = u"//iso_639_3_entry[@*='"+l+"']"
392 ln = self._isocode.xml_select(isoxml)[0].name
394 ln = self._tranlang[lang].gettext(ln)
399 html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
404 publish(tmptarget, args.output)
405 publish(args.style+"css", args.output)
406 publish(args.style+"images",args.output)
408 def generateSitemap():
411 sfile = open('sitemap.txt')
412 flist = sfile.read().split()
415 sitemap.append(dict(link=f))
416 except IOError, what_error:
417 print 'Sitemap missing - generating one.'
419 for dirname, dirnames, filenames in os.walk('.'):
420 for filename in filenames:
421 if fnmatch.fnmatch(filename, '*.xml'):
422 xfile = os.path.join(dirname,filename)
423 doc = bindery.parse(xfile,
424 prefixes={u'db': u'http://docbook.org/ns/docbook',
425 u'xi': u'http://www.w3.org/2001/XInclude',
426 u'xl': u'http://www.w3.org/1999/xlink'})
427 title = doc.xml_select(u'/db:article/db:info/db:title')
428 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
429 code = doc.xml_select(u"//xi:include[@parse='text']")
430 resource = doc.xml_select(u"//db:link[@xl:href]")
431 image = doc.xml_select(u"//db:imagedata[@fileref]")
434 (p, ext) = os.path.splitext(c.href)
435 if ext in valid_scripts:
440 base = xfile.split('.')[1]
441 link = base.replace('index','')
442 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
445 rf = os.path.join(dirname,r.href)
446 if os.path.isfile(rf):
449 im = os.path.join(dirname,i.fileref)
450 if os.path.isfile(im):
452 page = dict(title=unicode(doc.article.info.title),
453 menu=unicode(doc.article.info.titleabbrev),
454 output=os.path.join(dirname,
455 filename.replace('xml','html')),
461 if l['link'] == link:
465 print "adding "+link+" to sitemap"
469 sfile = open('sitemap.txt','w')
471 sfile.write(l['link']+'\n')
475 def expandXincludeTxt(page):
476 doc = bindery.parse(page['file'],
477 prefixes={u'db': u'http://docbook.org/ns/docbook',
478 u'xi': u'http://www.w3.org/2001/XInclude'})
480 code = doc.xml_select(u"//xi:include[@parse='text']")
482 (p, ext) = os.path.splitext(c.href)
483 if ext in valid_scripts:
484 exe = os.path.join(os.path.abspath(c.href))
485 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
486 xstr = bindery.parse(str(xml.stdout.read()))
487 id = c.xml_index_on_parent
488 for x in xstr.xml_children:
489 c.xml_parent.xml_insert(id,x)
490 c.xml_parent.xml_remove(c)
493 def xsltConvert(doc):
494 # amara can not handle the docbook stylesheets
495 # xmlarticle = transform(doc,style_xslt)
497 rundir = os.path.dirname(page['file'])
499 infile = os.path.basename(tempfile.mktemp())
500 outfile = tempfile.mktemp()
501 tfi = open(infile,'w')
502 tfi.write(doc.xml_encode())
504 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
505 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
506 retcode = subprocess.call(cmd)
508 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
509 tfo = open(outfile,'r')
517 def genMenu(page,sitemap,slevel,elevel):
520 if elevel == MAXLEVEL or elevel == 1 or page == None:
524 html = '<ul class="tree">\n'
525 idx = sitemap.index(page)
526 while (sitemap[idx]['level'] == page['level']):
528 title = sitemap[idx]['menu']
530 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
531 sm.append(sitemap[idx])
536 if slevel > p['level'] or elevel < p['level']:
538 if not title and p['link'] == '/':
541 if oldlevel < p['level']:
543 elif oldlevel > p['level']:
544 if p['link'][-1] == '/':
546 html+='</ul>\n</li>\n'
547 if page != None and page == p:
548 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
550 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
551 if p['link'][-1] != '/' or p['link'] == '/':
553 oldlevel = p['level']
557 def writeToTemplate(page,doc,sitemap):
558 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
559 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
560 template = Template(file=style_tmpl,
561 searchList=[{'title':page['title']},
564 {'levelmenu':levelmenu},
565 {'levelname':levelname}])
566 outfile = tmptarget+page['output']
567 mkdir_p(os.path.dirname(outfile))
568 out = open(outfile, 'w')
569 out.write(str(template))
571 for r in page['res']:
572 mkdir_p(os.path.dirname(tmptarget+r))
573 shutil.copyfile(r, tmptarget+r)
575 def createSitemap(sitemap):
576 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
577 template = Template(file=style_tmpl,
584 outfile = tmptarget+'sitemap.en.html'
585 mkdir_p(os.path.dirname(outfile))
586 out = open(outfile, 'w')
587 out.write(str(template))
596 missing = dir_.set() - sitemap.set()
597 removed = sitemap.set() - dir_.set()
599 print page+' pages missing!!'
601 print 'adding missing page '+page
602 sitemap.add_link(page)
603 if len(missing & removed) != 0:
604 print 'writing new sitemap - please adjust if needed'
613 print "Publish [%5.2f s]" % (round(t2-t1,2))
615 sitemap = generateSitemap()
616 tmptarget = tempfile.mkdtemp()+'/'
620 print "Page : %-30s %30s" % (page['link'],
621 time.ctime(os.stat(page['file']).st_mtime)),
622 doc = expandXincludeTxt(page)
623 pubdoc = xsltConvert(doc)
624 writeToTemplate(page,pubdoc,sitemap)
626 print "[%5.2f s]" % (round(t2-t1,2))
629 print "Total time\t\t\t\t\t\t\t [%5.2f s]" % (round(tot,2))
630 createSitemap(sitemap)
631 publish(tmptarget, args.output)
632 publish(args.style+"css", args.output)
633 publish(args.style+"images",args.output)