Implementing generation of a menu, currently only the full menu with and without...
[treecutter.git] / src / tree-cutter.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 import subprocess
5 import amara
6 import re
7 import tempfile
8 import errno
9 import time
10 import argparse
11 import shutil
12 import pygraphviz as pgv
13 import glob
14 from amara import bindery
15 from amara.xslt import transform
16 from Cheetah.Template import Template
17
18 parser = argparse.ArgumentParser(description='Process docbook article tree.')
19 parser.add_argument('--style', nargs='?',
20                     default=os.path.dirname(os.getcwd())+'/style/default/')
21 parser.add_argument('--output', nargs='?',
22                     default=os.path.dirname(os.getcwd())+'/htdocs/')
23 args = parser.parse_args()
24
25 style_xslt = args.style+"docbook.xsl"
26 style_tmpl = args.style+"index.en.html.tmpl"
27 outputdir = args.output
28
29 tmptarget = tempfile.mkdtemp()+'/'
30
31 valid_scripts = ['.py','.pl']
32 MAXLEVEL = 10000
33
34 def mkdir_p(path):
35     try:
36         os.makedirs(path)
37     except OSError as exc: # Python >2.5
38         if exc.errno == errno.EEXIST:
39             pass
40         else: raise
41
42 def publish(src,target):
43     cmd = ["rsync","-a","--delete",src,target]
44     retcode = subprocess.call(cmd)
45     if retcode:
46         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
47
48
49 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
50           u'xi': u'http://www.w3.org/2001/XInclude',
51           u'xl': u'http://www.w3.org/1999/xlink'}
52
53 class Directory():
54     """Class containing the state of the directory with articles"""
55     def __init__(self):
56         self._cwd = '.'
57         self._tree = []
58
59     def scan(self):
60         for dirname, dirnames, filenames in os.walk(self._cwd):
61             for filename in filenames:
62                 if fnmatch.fnmatch(filename, '*.xml'):
63                     file_ = os.path.join(dirname,filename)
64                     doc = bindery.parse(file_, prefixes=PREFIXES)
65                     title = doc.xml_select(u'/db:article/db:info/db:title')
66                     menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
67                     if title and menu:
68                         base = file_.split('.')[1]
69                         link = base.replace('index','')
70                         self._tree.append(link)
71
72     def set(self):
73         return set(self._tree)
74
75 class Page():
76     """Class representing a version of a webpage"""
77     def __init__(self,page):
78         self._file = page[1]
79         self._lang = page[0]
80         self._doc = None
81         self._resources = []
82         self._title = None
83         self._menu = None
84         self._rendered_article = None
85
86     def language(self):
87         return self._lang
88
89     def menu(self):
90         return self._menu
91
92     def set_article(self,art):
93         self._rendered_article = art
94
95     def prepare(self):
96         self._doc = bindery.parse(self._file, prefixes=PREFIXES)
97         if self._doc.xml_select(u'/db:article/db:info/db:title'):
98             self._title = unicode(self._doc.article.info.title)
99         if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
100             self._menu = unicode(self._doc.article.info.titleabbrev)
101
102         dirname = os.path.dirname(self._file)
103         code  = self._doc.xml_select(u"//xi:include[@parse='text']")
104         if code:
105             for c in code:
106                 (p, ext) = os.path.splitext(c.href)
107                 if ext in valid_scripts:
108                     exe = os.path.join(os.path.abspath(dirname+c.href))
109                     xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
110                     xstr = bindery.parse(str(xml.stdout.read()))
111                     idp = c.xml_index_on_parent
112                     for x in xstr.xml_children:
113                         c.xml_parent.xml_insert(idp,x)
114                         c.xml_parent.xml_remove(c)
115
116         for r in self._doc.xml_select(u"//db:link[@xl:href]"):
117             rf = os.path.join(dirname,r.href)
118             if os.path.isfile(rf):
119                 self._resources.append(rf)
120         for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
121             im = os.path.join(dirname,i.fileref)
122             if os.path.isfile(im):
123                 self._resources.append(im)
124
125     def render(self):
126         #  amara can not handle the docbook stylesheets
127         #  xmlarticle = transform(doc,style_xslt)
128         cwd = os.getcwd()
129         dirname = os.path.dirname(self._file)
130         os.chdir(dirname)
131         infile  = os.path.basename(tempfile.mktemp())
132         outfile = tempfile.mktemp()
133         tfi = open(infile,'w')
134         tfi.write(self._doc.xml_encode())
135         tfi.close()
136 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
137         cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
138         retcode = subprocess.call(cmd)
139         if retcode:
140             print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
141         tfo = open(outfile,'r')
142         self._rendered_article = tfo.read()
143         tfo.close()
144         os.remove(infile)
145         os.remove(outfile)
146         os.chdir(cwd)
147
148     def template(self,sitemap):
149         htmlmenu =  sitemap.gen_menu(self._lang,None,None)
150         levelmenu = sitemap.gen_menu(self._lang,self,"tree")
151         template = Template(file=style_tmpl,
152                             searchList=[{'title':self._title},
153                                         {'menu':htmlmenu},
154                                         {'article':self._rendered_article},
155                                         {'levelmenu':levelmenu},
156                                         {'levelname':'Menu'}])
157         outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
158         mkdir_p(os.path.dirname(outfile))
159         out = open(outfile, 'w')
160         out.write(str(template))
161         out.close()
162
163
164 class Link():
165     """Class representing a webpage on the site"""
166     def __init__(self,link):
167         self._link = link
168         # find the representations of the link.
169         self._pages = []
170         path = link
171         if self._link[-1] == '/':
172             path = path+'index'
173         lang = self._scan_languages(path)
174         for l in lang:
175             self._pages.append(Page(l))
176
177     def _scan_languages(self,path):
178         lang = []
179         for l in  glob.glob('.'+path+'*'):
180             ls = l.split('.')
181             if len(ls) > 3 and ls[3] == 'xml':
182                 lang.append((ls[2],l))
183         return lang
184
185     def link(self):
186         return self._link
187
188     def prepare(self):
189         for page in self._pages:
190             page.prepare()
191
192     def languages(self):
193         p = []
194         for page in self._pages:
195             p.append(page.language())
196         return p
197
198     def render(self):
199         for page in self._pages:
200             page.render()
201
202     def template(self,sitemap):
203         for page in self._pages:
204             page.template(sitemap)
205
206     def page(self,lang):
207         for page in self._pages:
208             if page.language()==lang:
209                 return page
210
211 class Node():
212     def __init__(self,token,value):
213         self._token = token
214         self._value = value
215         self._children = []
216
217     def token(self):
218         return self._token
219
220     def value(self):
221         return self._value
222
223     def children(self):
224         return self._children
225
226 class Trie():
227     def __init__(self):
228         self._root = []
229
230     def __iter__(self):
231         return self.inorder(self._root)
232
233     def inorder(self,t):
234         for l in t:
235             yield l.value()
236             for x in self.inorder(l.children()):
237                 yield x
238
239     def _add(self,trie, key, content):
240         # is the key a leaf
241         k = key.pop(0)
242         if key == []:
243             node = Node(k,content)
244             trie.append(node)
245         else:
246             for ch in trie:
247                 if ch.token() == k:
248                     self._add(ch.children(), key, content)
249
250     def add(self,key, content):
251         self._add(self._root, key, content)
252
253     def _graph(self, trie, G):
254         for l in trie:
255             G.add_node(l.token())
256             for ch in l.children():
257                 G.add_edge(l.token(),ch.token())
258                 self._graph(l.children(), G)
259
260     def graph(self):
261         G = pgv.AGraph(directed=True)
262         G.add_node("sitemap")
263         for ch in self._root:
264             G.add_edge("sitemap",ch.token())
265         self._graph(self._root, G)
266 #        G.layout('dot')
267 #        G.draw('g.png')
268 #        print G.string()
269
270     def _menu(self, trie, lang, page, css):
271         html = "<ul%s>\n" % css
272         for l in trie:
273             sel = ''
274             if l.value().page(lang) == page:
275                 sel = ' class="selected"'
276             html += '<li%s><a href="%s">%s</a>\n' \
277             % (sel,l.value().link(),l.value().page(lang).menu())
278             html += self._menu(l.children(), lang, page, "")
279         html += "</ul>\n"
280         return html
281
282     def menu(self,lang,page,cssclass):
283         css = ''
284         if cssclass:
285             css = ' class="'+cssclass+'"'
286         return self._menu(self._root, lang, page, css)
287
288 class Sitemap():
289     """Class keeping the internal site structure"""
290     def __init__(self):
291         self._file = 'sitemap.txt'
292         self._tree = Trie()
293
294     def add_link(self, link):
295         tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
296         self._tree.add(tokens,Link(link))
297
298     def read_map(self):
299         try:
300             f = open(self._file)
301             sml = f.read().split()
302             f.close()
303             for line in sml:
304                 self.add_link(line)
305         except IOError, what_error:
306             print 'INFO: Could not read sitemap.txt - one will be created'
307
308     def set(self):
309         return set(link.link() for link in self._tree)
310
311     def graph(self):
312         self._tree.graph()
313
314     def gen_menu(self,lang,page):
315         return 'Generate menu from sitemap - To be implemented'
316
317 def generateSitemap():
318     sitemap = []
319     try:
320         sfile = open('sitemap.txt')
321         flist = sfile.read().split()
322         sfile.close()
323         for f in flist:
324             sitemap.append(dict(link=f))
325     except IOError, what_error:
326         print 'Sitemap missing - generating one.'
327
328     for dirname, dirnames, filenames in os.walk('.'):
329         for filename in filenames:
330             if fnmatch.fnmatch(filename, '*.xml'):
331                 xfile = os.path.join(dirname,filename)
332                 doc = bindery.parse(xfile,
333                                     prefixes={u'db': u'http://docbook.org/ns/docbook',
334                                               u'xi': u'http://www.w3.org/2001/XInclude',
335                                               u'xl': u'http://www.w3.org/1999/xlink'})
336                 title = doc.xml_select(u'/db:article/db:info/db:title')
337                 menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
338                 code  = doc.xml_select(u"//xi:include[@parse='text']")
339                 resource = doc.xml_select(u"//db:link[@xl:href]")
340                 image = doc.xml_select(u"//db:imagedata[@fileref]")
341                 exe = 0
342                 for c in code:
343                     (p, ext) = os.path.splitext(c.href)
344                     if ext in valid_scripts:
345                         exe = 1
346
347                 if title and menu:
348                     found = 0
349                     base = xfile.split('.')[1]
350                     link = base.replace('index','')
351                     level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
352                     res = []
353                     for r in resource:
354                         rf = os.path.join(dirname,r.href)
355                         if os.path.isfile(rf):
356                             res.append(rf)
357                     for i in image:
358                         im = os.path.join(dirname,i.fileref)
359                         if os.path.isfile(im):
360                             res.append(im)
361                     page = dict(title=unicode(doc.article.info.title),
362                                 menu=unicode(doc.article.info.titleabbrev),
363                                 output=os.path.join(dirname,
364                                                     filename.replace('xml','html')),
365                                 exe=exe,
366                                 file=xfile,
367                                 res=res,
368                                 level=level)
369                     for l in sitemap:
370                         if l['link'] == link:
371                             found = 1
372                             l.update(page)
373                     if not found:
374                         print "adding "+link+" to sitemap"
375                         dd = dict(link=link)
376                         dd.update(page)
377                         sitemap.append(dd)
378     sfile = open('sitemap.txt','w')
379     for l in sitemap:
380         sfile.write(l['link']+'\n')
381     sfile.close()
382     return sitemap
383
384 def expandXincludeTxt(page):
385     doc = bindery.parse(page['file'],
386                         prefixes={u'db': u'http://docbook.org/ns/docbook',
387                                   u'xi': u'http://www.w3.org/2001/XInclude'})
388     if page['exe']:
389         code  = doc.xml_select(u"//xi:include[@parse='text']")
390         for c in code:
391             (p, ext) = os.path.splitext(c.href)
392             if ext in valid_scripts:
393                 exe = os.path.join(os.path.abspath(c.href))
394                 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
395                 xstr = bindery.parse(str(xml.stdout.read()))
396                 id = c.xml_index_on_parent
397                 for x in xstr.xml_children:
398                     c.xml_parent.xml_insert(id,x)
399                 c.xml_parent.xml_remove(c)
400     return doc
401
402 def xsltConvert(doc):
403 #  amara can not handle the docbook stylesheets
404 #  xmlarticle = transform(doc,style_xslt)
405     cwd = os.getcwd()
406     rundir = os.path.dirname(page['file'])
407     os.chdir(rundir)
408     infile  = os.path.basename(tempfile.mktemp())
409     outfile = tempfile.mktemp()
410     tfi = open(infile,'w')
411     tfi.write(doc.xml_encode())
412     tfi.close()
413 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
414     cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
415     retcode = subprocess.call(cmd)
416     if retcode:
417         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
418     tfo = open(outfile,'r')
419     result = tfo.read()
420     tfo.close()
421     os.remove(infile)
422     os.remove(outfile)
423     os.chdir(cwd)
424     return result
425
426 def genMenu(page,sitemap,slevel,elevel):
427     title = None
428     sm = []
429     if elevel == MAXLEVEL or elevel == 1 or page == None:
430         html = '<ul>\n'
431         sm = sitemap
432     else:
433         html = '<ul class="tree">\n'
434         idx = sitemap.index(page)
435         while (sitemap[idx]['level'] == page['level']):
436             idx = idx-1
437         title = sitemap[idx]['menu']
438         idx = idx+1
439         while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
440             sm.append(sitemap[idx])
441             idx = idx+1
442     oldlevel = slevel
443
444     for p in sm:
445         if slevel > p['level'] or elevel < p['level']:
446             continue
447         if not title and p['link'] == '/':
448             title = p['menu']
449
450         if oldlevel < p['level']:
451             html+='<ul>\n'
452         elif oldlevel > p['level']:
453             if p['link'][-1] == '/':
454                 html+='</li>\n'
455             html+='</ul>\n</li>\n'
456         if page != None and page == p:
457             html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
458         else:
459             html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
460         if p['link'][-1] != '/' or p['link'] == '/':
461             html+='</li>\n'
462         oldlevel = p['level']
463     html+='</ul>\n'
464     return (html,title)
465
466 def writeToTemplate(page,doc,sitemap):
467     (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
468     (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
469     template = Template(file=style_tmpl,
470                         searchList=[{'title':page['title']},
471                                     {'menu':menu},
472                                     {'article':doc},
473                                     {'levelmenu':levelmenu},
474                                     {'levelname':levelname}])
475     outfile = tmptarget+page['output']
476     mkdir_p(os.path.dirname(outfile))
477     out = open(outfile, 'w')
478     out.write(str(template))
479     out.close()
480     for r in page['res']:
481         mkdir_p(os.path.dirname(tmptarget+r))
482         shutil.copyfile(r, tmptarget+r)
483
484 def createSitemap(sitemap):
485     (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
486     template = Template(file=style_tmpl,
487                         searchList=[
488             {'title':'Sitemap'},
489             {'menu':menu},
490             {'article':menu},
491             {'levelmenu':''},
492             {'levelname':''}])
493     outfile = tmptarget+'sitemap.en.html'
494     mkdir_p(os.path.dirname(outfile))
495     out = open(outfile, 'w')
496     out.write(str(template))
497     out.close()
498
499 dir_ = Directory()
500 sitemap = Sitemap()
501
502 dir_.scan()
503 sitemap.read_map()
504
505 missing = dir_.set() - sitemap.set()
506 removed = sitemap.set() - dir_.set()
507 for page in removed:
508     print removed+' pages missing!!'
509
510 for page in missing:
511     print 'adding missing page '+page
512     sitemap.add_page(page)
513
514 sitemap.graph()
515
516
517 sitemap = generateSitemap()
518 tmptarget = tempfile.mkdtemp()+'/'
519 for page in sitemap:
520     t1 = time.time()
521     print "Page : %-30s %30s" % (page['link'],
522                         time.ctime(os.stat(page['file']).st_mtime)),
523     doc = expandXincludeTxt(page)
524     pubdoc = xsltConvert(doc)
525     writeToTemplate(page,pubdoc,sitemap)
526     t2 = time.time()
527     print "[%5.2f s]" % (round(t2-t1,2))
528
529 createSitemap(sitemap)
530 publish(tmptarget, args.output)
531 publish(args.style+"css", args.output)
532 publish(args.style+"images",args.output)