12 import pygraphviz as pgv
15 from amara import bindery
16 from amara.xslt import transform
17 from Cheetah.Template import Template
19 parser = argparse.ArgumentParser(description='Process docbook article tree.')
20 parser.add_argument('--style', nargs='?',
21 default=os.path.dirname(os.getcwd())+'/style/default/')
22 parser.add_argument('--output', nargs='?',
23 default=os.path.dirname(os.getcwd())+'/htdocs/')
24 args = parser.parse_args()
26 style_xslt = args.style+"docbook.xsl"
27 style_tmpl = args.style+"index.en.html.tmpl"
28 outputdir = args.output
30 tmptarget = tempfile.mkdtemp()+'/'
32 valid_scripts = ['.py','.pl']
38 except OSError as exc: # Python >2.5
39 if exc.errno == errno.EEXIST:
43 def publish(src,target):
44 cmd = ["rsync","-a","--delete",src,target]
45 retcode = subprocess.call(cmd)
47 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
50 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
51 u'xi': u'http://www.w3.org/2001/XInclude',
52 u'xl': u'http://www.w3.org/1999/xlink'}
55 """Class containing the state of the directory with articles"""
61 for dirname, dirnames, filenames in os.walk(self._cwd):
62 for filename in filenames:
63 if fnmatch.fnmatch(filename, '*.xml'):
64 file_ = os.path.join(dirname,filename)
65 doc = bindery.parse(file_, prefixes=PREFIXES)
66 title = doc.xml_select(u'/db:article/db:info/db:title')
67 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
69 base = file_.split('.')[1]
70 link = base.replace('index','')
71 self._tree.append(link)
74 return set(self._tree)
77 """Class representing a version of a webpage"""
78 def __init__(self,page):
85 self._rendered_article = None
93 def set_article(self,art):
94 self._rendered_article = art
97 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
98 if self._doc.xml_select(u'/db:article/db:info/db:title'):
99 self._title = unicode(self._doc.article.info.title)
100 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
101 self._menu = unicode(self._doc.article.info.titleabbrev)
103 dirname = os.path.dirname(self._file)
104 code = self._doc.xml_select(u"//xi:include[@parse='text']")
107 (p, ext) = os.path.splitext(c.href)
108 if ext in valid_scripts:
109 exe = os.path.join(os.path.abspath(dirname)+'/'+c.href)
110 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
111 xstr = bindery.parse(str(xml.stdout.read()))
112 idp = c.xml_index_on_parent
113 for x in xstr.xml_children:
114 c.xml_parent.xml_insert(idp,x)
115 c.xml_parent.xml_remove(c)
117 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
118 rf = os.path.join(dirname,r.href)
119 if os.path.isfile(rf):
120 self._resources.append(rf)
121 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
122 im = os.path.join(dirname,i.fileref)
123 if os.path.isfile(im):
124 self._resources.append(im)
127 # amara can not handle the docbook stylesheets
128 # xmlarticle = transform(doc,style_xslt)
130 dirname = os.path.dirname(self._file)
132 infile = os.path.basename(tempfile.mktemp())
133 outfile = tempfile.mktemp()
134 tfi = open(infile,'w')
135 tfi.write(self._doc.xml_encode())
137 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
138 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
139 retcode = subprocess.call(cmd)
141 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
142 tfo = open(outfile,'r')
143 self._rendered_article = tfo.read()
149 def template(self,sitemap):
150 htmlmenu = sitemap.gen_menu(self._lang,None,None)
151 levelmenu = sitemap.gen_menu(self._lang,self,"tree")
152 langmenu = sitemap.lang_menu(self._lang)
153 template = Template(file=style_tmpl,
154 searchList=[{'title':self._title},
156 {'article':self._rendered_article},
157 {'levelmenu':levelmenu},
158 {'langmenu':langmenu}])
159 outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
160 mkdir_p(os.path.dirname(outfile))
161 out = open(outfile, 'w')
162 out.write(str(template))
167 """Class representing a webpage on the site"""
168 def __init__(self,link):
170 # find the representations of the link.
173 if self._link[-1] == '/':
175 lang = self._scan_languages(path)
177 self._pages.append(Page(l))
179 def _scan_languages(self,path):
181 for l in glob.glob('.'+path+'*'):
183 if len(ls) > 3 and ls[3] == 'xml':
184 lang.append((ls[2],l))
191 for page in self._pages:
196 for page in self._pages:
197 p.append(page.language())
201 for page in self._pages:
204 def template(self,sitemap):
205 for page in self._pages:
206 page.template(sitemap)
209 for page in self._pages:
210 if page.language()==lang:
214 def __init__(self,token,value):
226 return self._children
233 return self.inorder(self._root)
238 for x in self.inorder(l.children()):
241 def _add(self,trie, key, content):
245 node = Node(k,content)
250 self._add(ch.children(), key, content)
252 def add(self,key, content):
253 self._add(self._root, key, content)
255 def _graph(self, trie, G):
257 G.add_node(l.token())
258 for ch in l.children():
259 G.add_edge(l.token(),ch.token())
260 self._graph(l.children(), G)
263 G = pgv.AGraph(directed=True)
264 G.add_node("sitemap")
265 for ch in self._root:
266 G.add_edge("sitemap",ch.token())
267 self._graph(self._root, G)
272 def _menu(self, trie, lang, page, css):
273 html = "<ul%s>\n" % css
276 if l.value().page(lang) == page:
277 sel = ' class="selected"'
278 html += '<li%s><a href="%s">%s</a>\n' \
279 % (sel,l.value().link(),l.value().page(lang).menu())
280 html += self._menu(l.children(), lang, page, "")
284 def menu(self,lang,page,cssclass):
287 css = ' class="'+cssclass+'"'
288 return self._menu(self._root, lang, page, css)
291 """Class keeping the internal site structure"""
293 self._file = 'sitemap.txt'
295 self._sitelang = set()
296 self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
299 def add_link(self, link):
300 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
301 self._tree.add(tokens,Link(link))
304 f = open(self._file,'w')
305 f.write('\n'.join(link.link() for link in self._tree))
311 sml = f.read().split()
315 except IOError, what_error:
316 print 'INFO: Could not read sitemap.txt - one will be created'
319 return set(link.link() for link in self._tree)
323 for link in self._tree:
326 print "Prepare [%5.2f s]" % (round(t2-t1,2))
327 for link in self._tree:
328 self._sitelang = self._sitelang.union(set(link.languages()))
329 for tran in self._sitelang:
331 self._tranlang[tran] = gettext.translation('iso_639_3', languages=[tran])
333 print "Language [%5.2f s]" % (round(t3-t2,2))
334 for link in self._tree:
337 print "Render [%5.2f s]" % (round(t4-t3,2))
338 for link in self._tree:
341 print "Template [%5.2f s]" % (round(t5-t4,2))
343 for l in self._sitelang:
344 sm[l] = Page((l,'/sitemap'))
345 sm[l].set_article(self.gen_menu(l,None,"tree sitemap"))
348 print "Sitemap [%5.2f s]" % (round(t6-t5,2))
353 def gen_menu(self,lang,page,cssclass):
354 return self._tree.menu(lang,page,cssclass)
356 def lang_menu(self,lang):
358 for l in self._sitelang:
359 isoxml = u"//iso_639_3_entry[@*='"+l+"']"
360 ln = self._isocode.xml_select(isoxml)[0].name
362 ln = self._tranlang[lang].gettext(ln)
363 html += '<li><a href="%s">%s</a></li>' % ('link'+'.'+l, ln)
368 publish(tmptarget, args.output)
369 publish(args.style+"css", args.output)
370 publish(args.style+"images",args.output)
372 def generateSitemap():
375 sfile = open('sitemap.txt')
376 flist = sfile.read().split()
379 sitemap.append(dict(link=f))
380 except IOError, what_error:
381 print 'Sitemap missing - generating one.'
383 for dirname, dirnames, filenames in os.walk('.'):
384 for filename in filenames:
385 if fnmatch.fnmatch(filename, '*.xml'):
386 xfile = os.path.join(dirname,filename)
387 doc = bindery.parse(xfile,
388 prefixes={u'db': u'http://docbook.org/ns/docbook',
389 u'xi': u'http://www.w3.org/2001/XInclude',
390 u'xl': u'http://www.w3.org/1999/xlink'})
391 title = doc.xml_select(u'/db:article/db:info/db:title')
392 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
393 code = doc.xml_select(u"//xi:include[@parse='text']")
394 resource = doc.xml_select(u"//db:link[@xl:href]")
395 image = doc.xml_select(u"//db:imagedata[@fileref]")
398 (p, ext) = os.path.splitext(c.href)
399 if ext in valid_scripts:
404 base = xfile.split('.')[1]
405 link = base.replace('index','')
406 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
409 rf = os.path.join(dirname,r.href)
410 if os.path.isfile(rf):
413 im = os.path.join(dirname,i.fileref)
414 if os.path.isfile(im):
416 page = dict(title=unicode(doc.article.info.title),
417 menu=unicode(doc.article.info.titleabbrev),
418 output=os.path.join(dirname,
419 filename.replace('xml','html')),
425 if l['link'] == link:
429 print "adding "+link+" to sitemap"
433 sfile = open('sitemap.txt','w')
435 sfile.write(l['link']+'\n')
439 def expandXincludeTxt(page):
440 doc = bindery.parse(page['file'],
441 prefixes={u'db': u'http://docbook.org/ns/docbook',
442 u'xi': u'http://www.w3.org/2001/XInclude'})
444 code = doc.xml_select(u"//xi:include[@parse='text']")
446 (p, ext) = os.path.splitext(c.href)
447 if ext in valid_scripts:
448 exe = os.path.join(os.path.abspath(c.href))
449 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
450 xstr = bindery.parse(str(xml.stdout.read()))
451 id = c.xml_index_on_parent
452 for x in xstr.xml_children:
453 c.xml_parent.xml_insert(id,x)
454 c.xml_parent.xml_remove(c)
457 def xsltConvert(doc):
458 # amara can not handle the docbook stylesheets
459 # xmlarticle = transform(doc,style_xslt)
461 rundir = os.path.dirname(page['file'])
463 infile = os.path.basename(tempfile.mktemp())
464 outfile = tempfile.mktemp()
465 tfi = open(infile,'w')
466 tfi.write(doc.xml_encode())
468 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
469 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
470 retcode = subprocess.call(cmd)
472 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
473 tfo = open(outfile,'r')
481 def genMenu(page,sitemap,slevel,elevel):
484 if elevel == MAXLEVEL or elevel == 1 or page == None:
488 html = '<ul class="tree">\n'
489 idx = sitemap.index(page)
490 while (sitemap[idx]['level'] == page['level']):
492 title = sitemap[idx]['menu']
494 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
495 sm.append(sitemap[idx])
500 if slevel > p['level'] or elevel < p['level']:
502 if not title and p['link'] == '/':
505 if oldlevel < p['level']:
507 elif oldlevel > p['level']:
508 if p['link'][-1] == '/':
510 html+='</ul>\n</li>\n'
511 if page != None and page == p:
512 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
514 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
515 if p['link'][-1] != '/' or p['link'] == '/':
517 oldlevel = p['level']
521 def writeToTemplate(page,doc,sitemap):
522 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
523 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
524 template = Template(file=style_tmpl,
525 searchList=[{'title':page['title']},
528 {'levelmenu':levelmenu},
529 {'levelname':levelname}])
530 outfile = tmptarget+page['output']
531 mkdir_p(os.path.dirname(outfile))
532 out = open(outfile, 'w')
533 out.write(str(template))
535 for r in page['res']:
536 mkdir_p(os.path.dirname(tmptarget+r))
537 shutil.copyfile(r, tmptarget+r)
539 def createSitemap(sitemap):
540 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
541 template = Template(file=style_tmpl,
548 outfile = tmptarget+'sitemap.en.html'
549 mkdir_p(os.path.dirname(outfile))
550 out = open(outfile, 'w')
551 out.write(str(template))
560 missing = dir_.set() - sitemap.set()
561 removed = sitemap.set() - dir_.set()
563 print page+' pages missing!!'
565 print 'adding missing page '+page
566 sitemap.add_link(page)
567 if len(missing & removed) != 0:
568 print 'writing new sitemap - please adjust if needed'
577 print "Publish [%5.2f s]" % (round(t2-t1,2))
579 sitemap = generateSitemap()
580 tmptarget = tempfile.mkdtemp()+'/'
584 print "Page : %-30s %30s" % (page['link'],
585 time.ctime(os.stat(page['file']).st_mtime)),
586 doc = expandXincludeTxt(page)
587 pubdoc = xsltConvert(doc)
588 writeToTemplate(page,pubdoc,sitemap)
590 print "[%5.2f s]" % (round(t2-t1,2))
593 print "Total time\t\t\t\t\t\t\t [%5.2f s]" % (round(tot,2))
594 createSitemap(sitemap)
595 publish(tmptarget, args.output)
596 publish(args.style+"css", args.output)
597 publish(args.style+"images",args.output)