12 import pygraphviz as pgv
14 from amara import bindery
15 from amara.xslt import transform
16 from Cheetah.Template import Template
18 parser = argparse.ArgumentParser(description='Process docbook article tree.')
19 parser.add_argument('--style', nargs='?',
20 default=os.path.dirname(os.getcwd())+'/style/default/')
21 parser.add_argument('--output', nargs='?',
22 default=os.path.dirname(os.getcwd())+'/htdocs/')
23 args = parser.parse_args()
25 style_xslt = args.style+"docbook.xsl"
26 style_tmpl = args.style+"index.en.html.tmpl"
27 outputdir = args.output
29 tmptarget = tempfile.mkdtemp()+'/'
31 valid_scripts = ['.py','.pl']
37 except OSError as exc: # Python >2.5
38 if exc.errno == errno.EEXIST:
42 def publish(src,target):
43 cmd = ["rsync","-a","--delete",src,target]
44 retcode = subprocess.call(cmd)
46 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
49 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
50 u'xi': u'http://www.w3.org/2001/XInclude',
51 u'xl': u'http://www.w3.org/1999/xlink'}
54 """Class containing the state of the directory with articles"""
60 for dirname, dirnames, filenames in os.walk(self._cwd):
61 for filename in filenames:
62 if fnmatch.fnmatch(filename, '*.xml'):
63 file_ = os.path.join(dirname,filename)
64 doc = bindery.parse(file_, prefixes=PREFIXES)
65 title = doc.xml_select(u'/db:article/db:info/db:title')
66 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
68 base = file_.split('.')[1]
69 link = base.replace('index','')
70 self._tree.append(link)
73 return set(self._tree)
76 """Class representing a version of a webpage"""
77 def __init__(self,page):
84 self._rendered_article = None
87 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
88 if self._doc.xml_select(u'/db:article/db:info/db:title'):
89 self._title = unicode(self._doc.article.info.title)
90 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
91 self._menu = unicode(self._doc.article.info.titleabbrev)
93 dirname = os.path.dirname(self._file)
94 code = self._doc.xml_select(u"//xi:include[@parse='text']")
97 (p, ext) = os.path.splitext(c.href)
98 if ext in valid_scripts:
99 exe = os.path.join(os.path.abspath(dirname+c.href))
100 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
101 xstr = bindery.parse(str(xml.stdout.read()))
102 idp = c.xml_index_on_parent
103 for x in xstr.xml_children:
104 c.xml_parent.xml_insert(idp,x)
105 c.xml_parent.xml_remove(c)
107 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
108 rf = os.path.join(dirname,r.href)
109 if os.path.isfile(rf):
110 self._resources.append(rf)
111 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
112 im = os.path.join(dirname,i.fileref)
113 if os.path.isfile(im):
114 self._resources.append(im)
117 # amara can not handle the docbook stylesheets
118 # xmlarticle = transform(doc,style_xslt)
120 dirname = os.path.dirname(self._file)
122 infile = os.path.basename(tempfile.mktemp())
123 outfile = tempfile.mktemp()
124 tfi = open(infile,'w')
125 tfi.write(self._doc.xml_encode())
127 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
128 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
129 retcode = subprocess.call(cmd)
131 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
132 tfo = open(outfile,'r')
133 self._rendered_article = tfo.read()
139 def template(self,sitemap):
140 htmlmenu = sitemap.gen_menu(self._lang,None)
141 levelmenu = sitemap.gen_menu(self._lang,self)
142 template = Template(file=style_tmpl,
143 searchList=[{'title':self._title},
145 {'article':self._rendered_article},
146 {'levelmenu':levelmenu},
147 {'levelname':levelname}])
148 outfile = tmptarget+self._file+'.'+self._lang+'.html'
149 mkdir_p(os.path.dirname(outfile))
150 out = open(outfile, 'w')
151 out.write(str(template))
156 """Class representing a webpage on the site"""
157 def __init__(self,link):
159 # find the representations of the link.
162 if self._link[-1] == '/':
164 lang = self._scan_languages(path)
166 self._pages.append(Page(l))
168 def _scan_languages(self,path):
170 for l in glob.glob('.'+path+'*'):
172 if len(ls) > 3 and ls[3] == 'xml':
173 lang.append((ls[2],l))
180 def __init__(self,token,value):
192 return self._children
199 return self.inorder(self._root)
204 for ch in l.children():
207 def _add(self,trie, key, content):
211 node = Node(k,content)
216 self._add(ch.children(), key, content)
218 def add(self,key, content):
219 self._add(self._root, key, content)
221 def _graph(self, trie, G):
223 G.add_node(l.token())
224 for ch in l.children():
225 G.add_edge(l.token(),ch.token())
226 self._graph(l.children(), G)
229 G = pgv.AGraph(directed=True)
230 G.add_node("sitemap")
231 for ch in self._root:
232 G.add_edge("sitemap",ch.token())
233 self._graph(self._root, G)
239 """Class keeping the internal site structure"""
241 self._file = 'sitemap.txt'
244 def add_link(self, link):
245 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
246 self._tree.add(tokens,Link(link))
251 sml = f.read().split()
255 except IOError, what_error:
256 print 'INFO: Could not read sitemap.txt - one will be created'
259 return set(link.link() for link in self._tree)
264 def gen_menu(self,lang,page):
265 return 'Generate menu from sitemap - To be implemented'
267 def generateSitemap():
270 sfile = open('sitemap.txt')
271 flist = sfile.read().split()
274 sitemap.append(dict(link=f))
275 except IOError, what_error:
276 print 'Sitemap missing - generating one.'
278 for dirname, dirnames, filenames in os.walk('.'):
279 for filename in filenames:
280 if fnmatch.fnmatch(filename, '*.xml'):
281 xfile = os.path.join(dirname,filename)
282 doc = bindery.parse(xfile,
283 prefixes={u'db': u'http://docbook.org/ns/docbook',
284 u'xi': u'http://www.w3.org/2001/XInclude',
285 u'xl': u'http://www.w3.org/1999/xlink'})
286 title = doc.xml_select(u'/db:article/db:info/db:title')
287 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
288 code = doc.xml_select(u"//xi:include[@parse='text']")
289 resource = doc.xml_select(u"//db:link[@xl:href]")
290 image = doc.xml_select(u"//db:imagedata[@fileref]")
293 (p, ext) = os.path.splitext(c.href)
294 if ext in valid_scripts:
299 base = xfile.split('.')[1]
300 link = base.replace('index','')
301 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
304 rf = os.path.join(dirname,r.href)
305 if os.path.isfile(rf):
308 im = os.path.join(dirname,i.fileref)
309 if os.path.isfile(im):
311 page = dict(title=unicode(doc.article.info.title),
312 menu=unicode(doc.article.info.titleabbrev),
313 output=os.path.join(dirname,
314 filename.replace('xml','html')),
320 if l['link'] == link:
324 print "adding "+link+" to sitemap"
328 sfile = open('sitemap.txt','w')
330 sfile.write(l['link']+'\n')
334 def expandXincludeTxt(page):
335 doc = bindery.parse(page['file'],
336 prefixes={u'db': u'http://docbook.org/ns/docbook',
337 u'xi': u'http://www.w3.org/2001/XInclude'})
339 code = doc.xml_select(u"//xi:include[@parse='text']")
341 (p, ext) = os.path.splitext(c.href)
342 if ext in valid_scripts:
343 exe = os.path.join(os.path.abspath(c.href))
344 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
345 xstr = bindery.parse(str(xml.stdout.read()))
346 id = c.xml_index_on_parent
347 for x in xstr.xml_children:
348 c.xml_parent.xml_insert(id,x)
349 c.xml_parent.xml_remove(c)
352 def xsltConvert(doc):
353 # amara can not handle the docbook stylesheets
354 # xmlarticle = transform(doc,style_xslt)
356 rundir = os.path.dirname(page['file'])
358 infile = os.path.basename(tempfile.mktemp())
359 outfile = tempfile.mktemp()
360 tfi = open(infile,'w')
361 tfi.write(doc.xml_encode())
363 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
364 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
365 retcode = subprocess.call(cmd)
367 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
368 tfo = open(outfile,'r')
376 def genMenu(page,sitemap,slevel,elevel):
379 if elevel == MAXLEVEL or elevel == 1 or page == None:
383 html = '<ul class="tree">\n'
384 idx = sitemap.index(page)
385 while (sitemap[idx]['level'] == page['level']):
387 title = sitemap[idx]['menu']
389 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
390 sm.append(sitemap[idx])
395 if slevel > p['level'] or elevel < p['level']:
397 if not title and p['link'] == '/':
400 if oldlevel < p['level']:
402 elif oldlevel > p['level']:
403 if p['link'][-1] == '/':
405 html+='</ul>\n</li>\n'
406 if page != None and page == p:
407 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
409 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
410 if p['link'][-1] != '/' or p['link'] == '/':
412 oldlevel = p['level']
416 def writeToTemplate(page,doc,sitemap):
417 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
418 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
419 template = Template(file=style_tmpl,
420 searchList=[{'title':page['title']},
423 {'levelmenu':levelmenu},
424 {'levelname':levelname}])
425 outfile = tmptarget+page['output']
426 mkdir_p(os.path.dirname(outfile))
427 out = open(outfile, 'w')
428 out.write(str(template))
430 for r in page['res']:
431 mkdir_p(os.path.dirname(tmptarget+r))
432 shutil.copyfile(r, tmptarget+r)
434 def createSitemap(sitemap):
435 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
436 template = Template(file=style_tmpl,
443 outfile = tmptarget+'sitemap.en.html'
444 mkdir_p(os.path.dirname(outfile))
445 out = open(outfile, 'w')
446 out.write(str(template))
455 missing = dir_.set() - sitemap.set()
456 removed = sitemap.set() - dir_.set()
458 print removed+' pages missing!!'
461 print 'adding missing page '+page
462 sitemap.add_page(page)
467 sitemap = generateSitemap()
468 tmptarget = tempfile.mkdtemp()+'/'
471 print "Page : %-30s %30s" % (page['link'],
472 time.ctime(os.stat(page['file']).st_mtime)),
473 doc = expandXincludeTxt(page)
474 pubdoc = xsltConvert(doc)
475 writeToTemplate(page,pubdoc,sitemap)
477 print "[%5.2f s]" % (round(t2-t1,2))
479 createSitemap(sitemap)
480 publish(tmptarget, args.output)
481 publish(args.style+"css", args.output)
482 publish(args.style+"images",args.output)