12 import pygraphviz as pgv
14 from amara import bindery
15 from amara.xslt import transform
16 from Cheetah.Template import Template
18 parser = argparse.ArgumentParser(description='Process docbook article tree.')
19 parser.add_argument('--style', nargs='?',
20 default=os.path.dirname(os.getcwd())+'/style/default/')
21 parser.add_argument('--output', nargs='?',
22 default=os.path.dirname(os.getcwd())+'/htdocs/')
23 args = parser.parse_args()
25 style_xslt = args.style+"docbook.xsl"
26 style_tmpl = args.style+"index.en.html.tmpl"
27 outputdir = args.output
29 tmptarget = tempfile.mkdtemp()+'/'
31 valid_scripts = ['.py','.pl']
37 except OSError as exc: # Python >2.5
38 if exc.errno == errno.EEXIST:
42 def publish(src,target):
43 cmd = ["rsync","-a","--delete",src,target]
44 retcode = subprocess.call(cmd)
46 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
49 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
50 u'xi': u'http://www.w3.org/2001/XInclude',
51 u'xl': u'http://www.w3.org/1999/xlink'}
54 """Class containing the state of the directory with articles"""
60 for dirname, dirnames, filenames in os.walk(self._cwd):
61 for filename in filenames:
62 if fnmatch.fnmatch(filename, '*.xml'):
63 file_ = os.path.join(dirname,filename)
64 doc = bindery.parse(file_, prefixes=PREFIXES)
65 title = doc.xml_select(u'/db:article/db:info/db:title')
66 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
68 base = file_.split('.')[1]
69 link = base.replace('index','')
70 self._tree.append(link)
73 return set(self._tree)
76 """Class representing a version of a webpage"""
77 def __init__(self,page):
84 self._rendered_article = None
92 def set_article(self,art):
93 self._rendered_article = art
96 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
97 if self._doc.xml_select(u'/db:article/db:info/db:title'):
98 self._title = unicode(self._doc.article.info.title)
99 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
100 self._menu = unicode(self._doc.article.info.titleabbrev)
102 dirname = os.path.dirname(self._file)
103 code = self._doc.xml_select(u"//xi:include[@parse='text']")
106 (p, ext) = os.path.splitext(c.href)
107 if ext in valid_scripts:
108 exe = os.path.join(os.path.abspath(dirname+c.href))
109 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
110 xstr = bindery.parse(str(xml.stdout.read()))
111 idp = c.xml_index_on_parent
112 for x in xstr.xml_children:
113 c.xml_parent.xml_insert(idp,x)
114 c.xml_parent.xml_remove(c)
116 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
117 rf = os.path.join(dirname,r.href)
118 if os.path.isfile(rf):
119 self._resources.append(rf)
120 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
121 im = os.path.join(dirname,i.fileref)
122 if os.path.isfile(im):
123 self._resources.append(im)
126 # amara can not handle the docbook stylesheets
127 # xmlarticle = transform(doc,style_xslt)
129 dirname = os.path.dirname(self._file)
131 infile = os.path.basename(tempfile.mktemp())
132 outfile = tempfile.mktemp()
133 tfi = open(infile,'w')
134 tfi.write(self._doc.xml_encode())
136 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
137 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
138 retcode = subprocess.call(cmd)
140 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
141 tfo = open(outfile,'r')
142 self._rendered_article = tfo.read()
148 def template(self,sitemap):
149 htmlmenu = sitemap.gen_menu(self._lang,None)
150 levelmenu = sitemap.gen_menu(self._lang,self)
151 template = Template(file=style_tmpl,
152 searchList=[{'title':self._title},
154 {'article':self._rendered_article},
155 {'levelmenu':levelmenu},
156 {'levelname':levelname}])
157 outfile = tmptarget+self._file+'.'+self._lang+'.html'
158 mkdir_p(os.path.dirname(outfile))
159 out = open(outfile, 'w')
160 out.write(str(template))
165 """Class representing a webpage on the site"""
166 def __init__(self,link):
168 # find the representations of the link.
171 if self._link[-1] == '/':
173 lang = self._scan_languages(path)
175 self._pages.append(Page(l))
177 def _scan_languages(self,path):
179 for l in glob.glob('.'+path+'*'):
181 if len(ls) > 3 and ls[3] == 'xml':
182 lang.append((ls[2],l))
189 for page in self._pages:
194 for page in self._pages:
195 p.append(page.language())
199 for page in self._pages:
202 def template(self,sitemap):
203 for page in self._pages:
204 page.template(sitemap)
207 for page in self._pages:
208 if page.language()==lang:
212 def __init__(self,token,value):
224 return self._children
231 return self.inorder(self._root)
236 for ch in l.children():
239 def _add(self,trie, key, content):
243 node = Node(k,content)
248 self._add(ch.children(), key, content)
250 def add(self,key, content):
251 self._add(self._root, key, content)
253 def _graph(self, trie, G):
255 G.add_node(l.token())
256 for ch in l.children():
257 G.add_edge(l.token(),ch.token())
258 self._graph(l.children(), G)
261 G = pgv.AGraph(directed=True)
262 G.add_node("sitemap")
263 for ch in self._root:
264 G.add_edge("sitemap",ch.token())
265 self._graph(self._root, G)
271 """Class keeping the internal site structure"""
273 self._file = 'sitemap.txt'
276 def add_link(self, link):
277 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
278 self._tree.add(tokens,Link(link))
283 sml = f.read().split()
287 except IOError, what_error:
288 print 'INFO: Could not read sitemap.txt - one will be created'
291 return set(link.link() for link in self._tree)
296 def gen_menu(self,lang,page):
297 return 'Generate menu from sitemap - To be implemented'
299 def generateSitemap():
302 sfile = open('sitemap.txt')
303 flist = sfile.read().split()
306 sitemap.append(dict(link=f))
307 except IOError, what_error:
308 print 'Sitemap missing - generating one.'
310 for dirname, dirnames, filenames in os.walk('.'):
311 for filename in filenames:
312 if fnmatch.fnmatch(filename, '*.xml'):
313 xfile = os.path.join(dirname,filename)
314 doc = bindery.parse(xfile,
315 prefixes={u'db': u'http://docbook.org/ns/docbook',
316 u'xi': u'http://www.w3.org/2001/XInclude',
317 u'xl': u'http://www.w3.org/1999/xlink'})
318 title = doc.xml_select(u'/db:article/db:info/db:title')
319 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
320 code = doc.xml_select(u"//xi:include[@parse='text']")
321 resource = doc.xml_select(u"//db:link[@xl:href]")
322 image = doc.xml_select(u"//db:imagedata[@fileref]")
325 (p, ext) = os.path.splitext(c.href)
326 if ext in valid_scripts:
331 base = xfile.split('.')[1]
332 link = base.replace('index','')
333 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
336 rf = os.path.join(dirname,r.href)
337 if os.path.isfile(rf):
340 im = os.path.join(dirname,i.fileref)
341 if os.path.isfile(im):
343 page = dict(title=unicode(doc.article.info.title),
344 menu=unicode(doc.article.info.titleabbrev),
345 output=os.path.join(dirname,
346 filename.replace('xml','html')),
352 if l['link'] == link:
356 print "adding "+link+" to sitemap"
360 sfile = open('sitemap.txt','w')
362 sfile.write(l['link']+'\n')
366 def expandXincludeTxt(page):
367 doc = bindery.parse(page['file'],
368 prefixes={u'db': u'http://docbook.org/ns/docbook',
369 u'xi': u'http://www.w3.org/2001/XInclude'})
371 code = doc.xml_select(u"//xi:include[@parse='text']")
373 (p, ext) = os.path.splitext(c.href)
374 if ext in valid_scripts:
375 exe = os.path.join(os.path.abspath(c.href))
376 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
377 xstr = bindery.parse(str(xml.stdout.read()))
378 id = c.xml_index_on_parent
379 for x in xstr.xml_children:
380 c.xml_parent.xml_insert(id,x)
381 c.xml_parent.xml_remove(c)
384 def xsltConvert(doc):
385 # amara can not handle the docbook stylesheets
386 # xmlarticle = transform(doc,style_xslt)
388 rundir = os.path.dirname(page['file'])
390 infile = os.path.basename(tempfile.mktemp())
391 outfile = tempfile.mktemp()
392 tfi = open(infile,'w')
393 tfi.write(doc.xml_encode())
395 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
396 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
397 retcode = subprocess.call(cmd)
399 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
400 tfo = open(outfile,'r')
408 def genMenu(page,sitemap,slevel,elevel):
411 if elevel == MAXLEVEL or elevel == 1 or page == None:
415 html = '<ul class="tree">\n'
416 idx = sitemap.index(page)
417 while (sitemap[idx]['level'] == page['level']):
419 title = sitemap[idx]['menu']
421 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
422 sm.append(sitemap[idx])
427 if slevel > p['level'] or elevel < p['level']:
429 if not title and p['link'] == '/':
432 if oldlevel < p['level']:
434 elif oldlevel > p['level']:
435 if p['link'][-1] == '/':
437 html+='</ul>\n</li>\n'
438 if page != None and page == p:
439 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
441 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
442 if p['link'][-1] != '/' or p['link'] == '/':
444 oldlevel = p['level']
448 def writeToTemplate(page,doc,sitemap):
449 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
450 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
451 template = Template(file=style_tmpl,
452 searchList=[{'title':page['title']},
455 {'levelmenu':levelmenu},
456 {'levelname':levelname}])
457 outfile = tmptarget+page['output']
458 mkdir_p(os.path.dirname(outfile))
459 out = open(outfile, 'w')
460 out.write(str(template))
462 for r in page['res']:
463 mkdir_p(os.path.dirname(tmptarget+r))
464 shutil.copyfile(r, tmptarget+r)
466 def createSitemap(sitemap):
467 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
468 template = Template(file=style_tmpl,
475 outfile = tmptarget+'sitemap.en.html'
476 mkdir_p(os.path.dirname(outfile))
477 out = open(outfile, 'w')
478 out.write(str(template))
487 missing = dir_.set() - sitemap.set()
488 removed = sitemap.set() - dir_.set()
490 print removed+' pages missing!!'
493 print 'adding missing page '+page
494 sitemap.add_page(page)
499 sitemap = generateSitemap()
500 tmptarget = tempfile.mkdtemp()+'/'
503 print "Page : %-30s %30s" % (page['link'],
504 time.ctime(os.stat(page['file']).st_mtime)),
505 doc = expandXincludeTxt(page)
506 pubdoc = xsltConvert(doc)
507 writeToTemplate(page,pubdoc,sitemap)
509 print "[%5.2f s]" % (round(t2-t1,2))
511 createSitemap(sitemap)
512 publish(tmptarget, args.output)
513 publish(args.style+"css", args.output)
514 publish(args.style+"images",args.output)