Adding method process and publish to mimic the old scripts stages to publish.
[treecutter.git] / src / tree-cutter.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 import subprocess
5 import amara
6 import re
7 import tempfile
8 import errno
9 import time
10 import argparse
11 import shutil
12 import pygraphviz as pgv
13 import glob
14 from amara import bindery
15 from amara.xslt import transform
16 from Cheetah.Template import Template
17
18 parser = argparse.ArgumentParser(description='Process docbook article tree.')
19 parser.add_argument('--style', nargs='?',
20                     default=os.path.dirname(os.getcwd())+'/style/default/')
21 parser.add_argument('--output', nargs='?',
22                     default=os.path.dirname(os.getcwd())+'/htdocs/')
23 args = parser.parse_args()
24
25 style_xslt = args.style+"docbook.xsl"
26 style_tmpl = args.style+"index.en.html.tmpl"
27 outputdir = args.output
28
29 tmptarget = tempfile.mkdtemp()+'/'
30
31 valid_scripts = ['.py','.pl']
32 MAXLEVEL = 10000
33
34 def mkdir_p(path):
35     try:
36         os.makedirs(path)
37     except OSError as exc: # Python >2.5
38         if exc.errno == errno.EEXIST:
39             pass
40         else: raise
41
42 def publish(src,target):
43     cmd = ["rsync","-a","--delete",src,target]
44     retcode = subprocess.call(cmd)
45     if retcode:
46         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
47
48
49 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
50           u'xi': u'http://www.w3.org/2001/XInclude',
51           u'xl': u'http://www.w3.org/1999/xlink'}
52
53 class Directory():
54     """Class containing the state of the directory with articles"""
55     def __init__(self):
56         self._cwd = '.'
57         self._tree = []
58
59     def scan(self):
60         for dirname, dirnames, filenames in os.walk(self._cwd):
61             for filename in filenames:
62                 if fnmatch.fnmatch(filename, '*.xml'):
63                     file_ = os.path.join(dirname,filename)
64                     doc = bindery.parse(file_, prefixes=PREFIXES)
65                     title = doc.xml_select(u'/db:article/db:info/db:title')
66                     menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
67                     if title and menu:
68                         base = file_.split('.')[1]
69                         link = base.replace('index','')
70                         self._tree.append(link)
71
72     def set(self):
73         return set(self._tree)
74
75 class Page():
76     """Class representing a version of a webpage"""
77     def __init__(self,page):
78         self._file = page[1]
79         self._lang = page[0]
80         self._doc = None
81         self._resources = []
82         self._title = None
83         self._menu = None
84         self._rendered_article = None
85
86     def language(self):
87         return self._lang
88
89     def menu(self):
90         return self._menu
91
92     def set_article(self,art):
93         self._rendered_article = art
94
95     def prepare(self):
96         self._doc = bindery.parse(self._file, prefixes=PREFIXES)
97         if self._doc.xml_select(u'/db:article/db:info/db:title'):
98             self._title = unicode(self._doc.article.info.title)
99         if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
100             self._menu = unicode(self._doc.article.info.titleabbrev)
101
102         dirname = os.path.dirname(self._file)
103         code  = self._doc.xml_select(u"//xi:include[@parse='text']")
104         if code:
105             for c in code:
106                 (p, ext) = os.path.splitext(c.href)
107                 if ext in valid_scripts:
108                     exe = os.path.join(os.path.abspath(dirname+c.href))
109                     xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
110                     xstr = bindery.parse(str(xml.stdout.read()))
111                     idp = c.xml_index_on_parent
112                     for x in xstr.xml_children:
113                         c.xml_parent.xml_insert(idp,x)
114                         c.xml_parent.xml_remove(c)
115
116         for r in self._doc.xml_select(u"//db:link[@xl:href]"):
117             rf = os.path.join(dirname,r.href)
118             if os.path.isfile(rf):
119                 self._resources.append(rf)
120         for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
121             im = os.path.join(dirname,i.fileref)
122             if os.path.isfile(im):
123                 self._resources.append(im)
124
125     def render(self):
126         #  amara can not handle the docbook stylesheets
127         #  xmlarticle = transform(doc,style_xslt)
128         cwd = os.getcwd()
129         dirname = os.path.dirname(self._file)
130         os.chdir(dirname)
131         infile  = os.path.basename(tempfile.mktemp())
132         outfile = tempfile.mktemp()
133         tfi = open(infile,'w')
134         tfi.write(self._doc.xml_encode())
135         tfi.close()
136 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
137         cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
138         retcode = subprocess.call(cmd)
139         if retcode:
140             print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
141         tfo = open(outfile,'r')
142         self._rendered_article = tfo.read()
143         tfo.close()
144         os.remove(infile)
145         os.remove(outfile)
146         os.chdir(cwd)
147
148     def template(self,sitemap):
149         htmlmenu =  sitemap.gen_menu(self._lang,None,None)
150         levelmenu = sitemap.gen_menu(self._lang,self,"tree")
151         template = Template(file=style_tmpl,
152                             searchList=[{'title':self._title},
153                                         {'menu':htmlmenu},
154                                         {'article':self._rendered_article},
155                                         {'levelmenu':levelmenu},
156                                         {'levelname':'Menu'}])
157         outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
158         mkdir_p(os.path.dirname(outfile))
159         out = open(outfile, 'w')
160         out.write(str(template))
161         out.close()
162
163
164 class Link():
165     """Class representing a webpage on the site"""
166     def __init__(self,link):
167         self._link = link
168         # find the representations of the link.
169         self._pages = []
170         path = link
171         if self._link[-1] == '/':
172             path = path+'index'
173         lang = self._scan_languages(path)
174         for l in lang:
175             self._pages.append(Page(l))
176
177     def _scan_languages(self,path):
178         lang = []
179         for l in  glob.glob('.'+path+'*'):
180             ls = l.split('.')
181             if len(ls) > 3 and ls[3] == 'xml':
182                 lang.append((ls[2],l))
183         return lang
184
185     def link(self):
186         return self._link
187
188     def prepare(self):
189         for page in self._pages:
190             page.prepare()
191
192     def languages(self):
193         p = []
194         for page in self._pages:
195             p.append(page.language())
196         return p
197
198     def render(self):
199         for page in self._pages:
200             page.render()
201
202     def template(self,sitemap):
203         for page in self._pages:
204             page.template(sitemap)
205
206     def page(self,lang):
207         for page in self._pages:
208             if page.language()==lang:
209                 return page
210
211 class Node():
212     def __init__(self,token,value):
213         self._token = token
214         self._value = value
215         self._children = []
216
217     def token(self):
218         return self._token
219
220     def value(self):
221         return self._value
222
223     def children(self):
224         return self._children
225
226 class Trie():
227     def __init__(self):
228         self._root = []
229
230     def __iter__(self):
231         return self.inorder(self._root)
232
233     def inorder(self,t):
234         for l in t:
235             yield l.value()
236             for x in self.inorder(l.children()):
237                 yield x
238
239     def _add(self,trie, key, content):
240         # is the key a leaf
241         k = key.pop(0)
242         if key == []:
243             node = Node(k,content)
244             trie.append(node)
245         else:
246             for ch in trie:
247                 if ch.token() == k:
248                     self._add(ch.children(), key, content)
249
250     def add(self,key, content):
251         self._add(self._root, key, content)
252
253     def _graph(self, trie, G):
254         for l in trie:
255             G.add_node(l.token())
256             for ch in l.children():
257                 G.add_edge(l.token(),ch.token())
258                 self._graph(l.children(), G)
259
260     def graph(self):
261         G = pgv.AGraph(directed=True)
262         G.add_node("sitemap")
263         for ch in self._root:
264             G.add_edge("sitemap",ch.token())
265         self._graph(self._root, G)
266 #        G.layout('dot')
267 #        G.draw('g.png')
268 #        print G.string()
269
270     def _menu(self, trie, lang, page, css):
271         html = "<ul%s>\n" % css
272         for l in trie:
273             sel = ''
274             if l.value().page(lang) == page:
275                 sel = ' class="selected"'
276             html += '<li%s><a href="%s">%s</a>\n' \
277             % (sel,l.value().link(),l.value().page(lang).menu())
278             html += self._menu(l.children(), lang, page, "")
279         html += "</ul>\n"
280         return html
281
282     def menu(self,lang,page,cssclass):
283         css = ''
284         if cssclass:
285             css = ' class="'+cssclass+'"'
286         return self._menu(self._root, lang, page, css)
287
288 class Sitemap():
289     """Class keeping the internal site structure"""
290     def __init__(self):
291         self._file = 'sitemap.txt'
292         self._tree = Trie()
293
294     def add_link(self, link):
295         tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
296         self._tree.add(tokens,Link(link))
297
298     def write_map(self):
299         f = open(self._file,'w')
300         f.write('\n'.join(link.link() for link in self._tree))
301         f.close()
302
303     def read_map(self):
304         try:
305             f = open(self._file)
306             sml = f.read().split()
307             f.close()
308             for line in sml:
309                 self.add_link(line)
310         except IOError, what_error:
311             print 'INFO: Could not read sitemap.txt - one will be created'
312
313     def set(self):
314         return set(link.link() for link in self._tree)
315
316     def process(self):
317         t1 = time.time()
318         for link in self._tree:
319             link.prepare()
320         t2 = time.time()
321         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
322         sitelang = set()
323         for link in self._tree:
324             sitelang = lang.union(set(link.languages()))
325         t3 = time.time()
326         print "Language [%5.2f s]" % (round(t3-t2,2))
327         for link in self._tree:
328             link.render()
329         t4 = time.time()
330         print "Render   [%5.2f s]" % (round(t4-t3,2))
331         for link in self._tree:
332             link.template(self)
333         t5 = time.time()
334         print "Template [%5.2f s]" % (round(t5-t4,2))
335         sm = {}
336         for l in sitelang:
337             sm[l] = Page((l,'/sitemap'))
338             sm[l].set_article(self.gen_menu(l,None,"tree sitemap"))
339             sm[l].template(self)
340         t6 = time.time()
341         print "Sitemap [%5.2f s]" % (round(t6-t5,2))
342
343     def graph(self):
344         self._tree.graph()
345
346     def gen_menu(self,lang,page,cssclass):
347         return self._tree.menu(lang,page,cssclass)
348
349     def publish(self):
350         publish(tmptarget, args.output)
351         publish(args.style+"css", args.output)
352         publish(args.style+"images",args.output)
353
354 def generateSitemap():
355     sitemap = []
356     try:
357         sfile = open('sitemap.txt')
358         flist = sfile.read().split()
359         sfile.close()
360         for f in flist:
361             sitemap.append(dict(link=f))
362     except IOError, what_error:
363         print 'Sitemap missing - generating one.'
364
365     for dirname, dirnames, filenames in os.walk('.'):
366         for filename in filenames:
367             if fnmatch.fnmatch(filename, '*.xml'):
368                 xfile = os.path.join(dirname,filename)
369                 doc = bindery.parse(xfile,
370                                     prefixes={u'db': u'http://docbook.org/ns/docbook',
371                                               u'xi': u'http://www.w3.org/2001/XInclude',
372                                               u'xl': u'http://www.w3.org/1999/xlink'})
373                 title = doc.xml_select(u'/db:article/db:info/db:title')
374                 menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
375                 code  = doc.xml_select(u"//xi:include[@parse='text']")
376                 resource = doc.xml_select(u"//db:link[@xl:href]")
377                 image = doc.xml_select(u"//db:imagedata[@fileref]")
378                 exe = 0
379                 for c in code:
380                     (p, ext) = os.path.splitext(c.href)
381                     if ext in valid_scripts:
382                         exe = 1
383
384                 if title and menu:
385                     found = 0
386                     base = xfile.split('.')[1]
387                     link = base.replace('index','')
388                     level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
389                     res = []
390                     for r in resource:
391                         rf = os.path.join(dirname,r.href)
392                         if os.path.isfile(rf):
393                             res.append(rf)
394                     for i in image:
395                         im = os.path.join(dirname,i.fileref)
396                         if os.path.isfile(im):
397                             res.append(im)
398                     page = dict(title=unicode(doc.article.info.title),
399                                 menu=unicode(doc.article.info.titleabbrev),
400                                 output=os.path.join(dirname,
401                                                     filename.replace('xml','html')),
402                                 exe=exe,
403                                 file=xfile,
404                                 res=res,
405                                 level=level)
406                     for l in sitemap:
407                         if l['link'] == link:
408                             found = 1
409                             l.update(page)
410                     if not found:
411                         print "adding "+link+" to sitemap"
412                         dd = dict(link=link)
413                         dd.update(page)
414                         sitemap.append(dd)
415     sfile = open('sitemap.txt','w')
416     for l in sitemap:
417         sfile.write(l['link']+'\n')
418     sfile.close()
419     return sitemap
420
421 def expandXincludeTxt(page):
422     doc = bindery.parse(page['file'],
423                         prefixes={u'db': u'http://docbook.org/ns/docbook',
424                                   u'xi': u'http://www.w3.org/2001/XInclude'})
425     if page['exe']:
426         code  = doc.xml_select(u"//xi:include[@parse='text']")
427         for c in code:
428             (p, ext) = os.path.splitext(c.href)
429             if ext in valid_scripts:
430                 exe = os.path.join(os.path.abspath(c.href))
431                 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
432                 xstr = bindery.parse(str(xml.stdout.read()))
433                 id = c.xml_index_on_parent
434                 for x in xstr.xml_children:
435                     c.xml_parent.xml_insert(id,x)
436                 c.xml_parent.xml_remove(c)
437     return doc
438
439 def xsltConvert(doc):
440 #  amara can not handle the docbook stylesheets
441 #  xmlarticle = transform(doc,style_xslt)
442     cwd = os.getcwd()
443     rundir = os.path.dirname(page['file'])
444     os.chdir(rundir)
445     infile  = os.path.basename(tempfile.mktemp())
446     outfile = tempfile.mktemp()
447     tfi = open(infile,'w')
448     tfi.write(doc.xml_encode())
449     tfi.close()
450 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
451     cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
452     retcode = subprocess.call(cmd)
453     if retcode:
454         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
455     tfo = open(outfile,'r')
456     result = tfo.read()
457     tfo.close()
458     os.remove(infile)
459     os.remove(outfile)
460     os.chdir(cwd)
461     return result
462
463 def genMenu(page,sitemap,slevel,elevel):
464     title = None
465     sm = []
466     if elevel == MAXLEVEL or elevel == 1 or page == None:
467         html = '<ul>\n'
468         sm = sitemap
469     else:
470         html = '<ul class="tree">\n'
471         idx = sitemap.index(page)
472         while (sitemap[idx]['level'] == page['level']):
473             idx = idx-1
474         title = sitemap[idx]['menu']
475         idx = idx+1
476         while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
477             sm.append(sitemap[idx])
478             idx = idx+1
479     oldlevel = slevel
480
481     for p in sm:
482         if slevel > p['level'] or elevel < p['level']:
483             continue
484         if not title and p['link'] == '/':
485             title = p['menu']
486
487         if oldlevel < p['level']:
488             html+='<ul>\n'
489         elif oldlevel > p['level']:
490             if p['link'][-1] == '/':
491                 html+='</li>\n'
492             html+='</ul>\n</li>\n'
493         if page != None and page == p:
494             html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
495         else:
496             html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
497         if p['link'][-1] != '/' or p['link'] == '/':
498             html+='</li>\n'
499         oldlevel = p['level']
500     html+='</ul>\n'
501     return (html,title)
502
503 def writeToTemplate(page,doc,sitemap):
504     (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
505     (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
506     template = Template(file=style_tmpl,
507                         searchList=[{'title':page['title']},
508                                     {'menu':menu},
509                                     {'article':doc},
510                                     {'levelmenu':levelmenu},
511                                     {'levelname':levelname}])
512     outfile = tmptarget+page['output']
513     mkdir_p(os.path.dirname(outfile))
514     out = open(outfile, 'w')
515     out.write(str(template))
516     out.close()
517     for r in page['res']:
518         mkdir_p(os.path.dirname(tmptarget+r))
519         shutil.copyfile(r, tmptarget+r)
520
521 def createSitemap(sitemap):
522     (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
523     template = Template(file=style_tmpl,
524                         searchList=[
525             {'title':'Sitemap'},
526             {'menu':menu},
527             {'article':menu},
528             {'levelmenu':''},
529             {'levelname':''}])
530     outfile = tmptarget+'sitemap.en.html'
531     mkdir_p(os.path.dirname(outfile))
532     out = open(outfile, 'w')
533     out.write(str(template))
534     out.close()
535
536 dir_ = Directory()
537 sitemap = Sitemap()
538
539 dir_.scan()
540 sitemap.read_map()
541
542 missing = dir_.set() - sitemap.set()
543 removed = sitemap.set() - dir_.set()
544 for page in removed:
545     print removed+' pages missing!!'
546 for page in missing:
547     print 'adding missing page '+page
548     sitemap.add_link(page)
549 if len(missing & removed) != 0:
550     print 'writing new sitemap - please adjust if needed'
551     sitemap.write_map()
552 sitemap.graph()
553
554 sitemap.process()
555
556 t1 = time.time()
557 sitemap.publish()
558 t2 = time.time()
559 print "Publish  [%5.2f s]" % (round(t2-t1,2))
560
561 sitemap = generateSitemap()
562 tmptarget = tempfile.mkdtemp()+'/'
563 for page in sitemap:
564     t1 = time.time()
565     print "Page : %-30s %30s" % (page['link'],
566                         time.ctime(os.stat(page['file']).st_mtime)),
567     doc = expandXincludeTxt(page)
568     pubdoc = xsltConvert(doc)
569     writeToTemplate(page,pubdoc,sitemap)
570     t2 = time.time()
571     print "[%5.2f s]" % (round(t2-t1,2))
572
573 createSitemap(sitemap)
574 publish(tmptarget, args.output)
575 publish(args.style+"css", args.output)
576 publish(args.style+"images",args.output)