Adding functionality to publish linked documents. link xlink:href and imagedata fileref.
[treecutter.git] / src / tree-cutter.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 import subprocess
5 import amara
6 import re
7 import tempfile
8 import errno
9 import time
10 import argparse
11 import shutil
12 from amara import bindery
13 from amara.xslt import transform
14 from Cheetah.Template import Template
15
16 parser = argparse.ArgumentParser(description='Process docbook article tree.')
17 parser.add_argument('--style', nargs='?',
18                     default=os.path.dirname(os.getcwd())+'/style/default/')
19 parser.add_argument('--output', nargs='?',
20                     default=os.path.dirname(os.getcwd())+'/htdocs/')
21 args = parser.parse_args()
22
23 style_xslt = args.style+"docbook.xsl"
24 style_tmpl = args.style+"index.en.html.tmpl"
25 outputdir = args.output
26
27 valid_scripts = ['.py','.pl']
28 MAXLEVEL = 10000
29
30 def mkdir_p(path):
31     try:
32         os.makedirs(path)
33     except OSError as exc: # Python >2.5
34         if exc.errno == errno.EEXIST:
35             pass
36         else: raise
37
38 def publish(src,target):
39     cmd = ["rsync","-a",src,target]
40     retcode = subprocess.call(cmd)
41     if retcode:
42         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
43
44 def generateSitemap():
45   sitemap = []
46   try:
47     sfile = open('sitemap.txt')
48     flist = sfile.read().split()
49     sfile.close()
50     for f in flist:
51       sitemap.append(dict(link=f))
52   except IOError, what_error:
53     print 'Sitemap missing - generating one.'
54   for dirname, dirnames, filenames in os.walk('.'):
55     for filename in filenames:
56       if fnmatch.fnmatch(filename, '*.xml'):
57         xfile = os.path.join(dirname,filename)
58         doc = bindery.parse(xfile,
59                             prefixes={u'db': u'http://docbook.org/ns/docbook',
60                                       u'xi': u'http://www.w3.org/2001/XInclude',
61                                       u'xl': u'http://www.w3.org/1999/xlink'})
62         title = doc.xml_select(u'/db:article/db:info/db:title')
63         menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
64         code  = doc.xml_select(u"//xi:include[@parse='text']")
65         resource = doc.xml_select(u"//db:link[@xl:href]")
66         image = doc.xml_select(u"//db:imagedata[@fileref]")
67         exe = 0
68         for c in code:
69           (p, ext) = os.path.splitext(c.href)
70           if ext in valid_scripts:
71             exe = 1
72
73         if title and menu:
74           found = 0
75           base = xfile.split('.')[1]
76           link = base.replace('index','')
77           level = len(filter(None,re.split(r'(/\w*/)',link)))
78           res = []
79           for r in resource:
80               rf = os.path.join(dirname,r.href)
81               if os.path.isfile(rf):
82                   res.append(rf)
83           for i in image:
84               im = os.path.join(dirname,i.fileref)
85               if os.path.isfile(im):
86                   res.append(im)
87           page = dict(title=unicode(doc.article.info.title),
88                       menu=unicode(doc.article.info.titleabbrev),
89                       output=os.path.join(dirname,
90                                           filename.replace('xml','html')),
91                       exe=exe,
92                       file=xfile,
93                       res=res,
94                       level=level)
95           for l in sitemap:
96             if l['link'] == link:
97               found = 1
98               l.update(page)
99           if not found:
100             print "adding "+link+" to sitemap"
101             dd = dict(link=link)
102             dd.update(page)
103             sitemap.append(dd)
104   sfile = open('sitemap.txt','w')
105   for l in sitemap:
106     sfile.write(l['link']+'\n')
107   sfile.close()
108   return sitemap
109
110 def expandXincludeTxt(page):
111   doc = bindery.parse(page['file'],
112                       prefixes={u'db': u'http://docbook.org/ns/docbook',
113                                 u'xi': u'http://www.w3.org/2001/XInclude'})
114   if page['exe']:
115     code  = doc.xml_select(u"//xi:include[@parse='text']")
116     for c in code:
117       (p, ext) = os.path.splitext(c.href)
118       if ext in valid_scripts:
119         exe = os.path.join(os.path.abspath(c.href))
120         xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
121         xstr = bindery.parse(str(xml.stdout.read()))
122         id = c.xml_index_on_parent
123         for x in xstr.xml_children:
124           c.xml_parent.xml_insert(id,x)
125         c.xml_parent.xml_remove(c)
126   return doc
127
128 def xsltConvert(doc):
129 #  amara can not handle the docbook stylesheets
130 #  xmlarticle = transform(doc,style_xslt)
131   cwd = os.getcwd()
132   rundir = os.path.dirname(page['file'])
133   os.chdir(rundir)
134   infile  = os.path.basename(tempfile.mktemp())
135   outfile = tempfile.mktemp()
136   tfi = open(infile,'w')
137   tfi.write(doc.xml_encode())
138   tfi.close()
139 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
140   cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
141   retcode = subprocess.call(cmd)
142   if retcode:
143     print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
144   tfo = open(outfile,'r')
145   result = tfo.read()
146   tfo.close()
147   os.remove(infile)
148   os.remove(outfile)
149   os.chdir(cwd)
150   return result
151
152 def genMenu(page,sitemap,slevel,elevel):
153   title = None
154   sm = []
155   if elevel == MAXLEVEL or elevel == 1:
156     sm = sitemap
157   else:
158     idx = sitemap.index(page)
159     while (sitemap[idx]['level'] == page['level']):
160       idx = idx-1
161     title = sitemap[idx]['menu']
162     idx = idx+1
163     while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
164       sm.append(sitemap[idx])
165       idx = idx+1
166   oldlevel = slevel
167   html = '<ul>\n'
168   for p in sm:
169     if slevel > p['level'] or elevel < p['level']:
170       continue
171     if not title and p['link'] == '/':
172       title = p['menu']
173
174     if oldlevel < p['level']:
175       html+='<ul>\n'
176     elif oldlevel > p['level']:
177       if p['link'][-1] == '/':
178         html+='</li>\n'
179       html+='</ul>\n</li>\n'
180     if page == p:
181       html+='<li><a href="%s">[%s]</a>' % (p['link'],p['menu'])
182     else:
183       html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
184     if p['link'][-1] != '/' or p['link'] == '/':
185         html+='</li>\n'
186     oldlevel = p['level']
187   html+='</ul>\n'
188   return (html,title)
189
190 def writeToTemplate(page,doc,sitemap):
191   (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
192   (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
193   template = Template(file=style_tmpl,
194                       searchList=[{'title':page['title']},
195                                   {'menu':menu},
196                                   {'article':doc},
197                                   {'levelmenu':levelmenu},
198                                   {'levelname':levelname}])
199   outfile = tmptarget+page['output']
200   mkdir_p(os.path.dirname(outfile))
201   out = open(outfile, 'w')
202   out.write(str(template))
203   out.close()
204   for r in page['res']:
205       mkdir_p(os.path.dirname(tmptarget+r))
206       shutil.copyfile(r, tmptarget+r)
207 sitemap = generateSitemap()
208 tmptarget = tempfile.mkdtemp()+'/'
209 for page in sitemap:
210   t1 = time.time()
211   print "Page : %-30s %30s" % (page['link'],
212                       time.ctime(os.stat(page['file']).st_mtime)),
213   doc = expandXincludeTxt(page)
214   pubdoc = xsltConvert(doc)
215   writeToTemplate(page,pubdoc,sitemap)
216   t2 = time.time()
217   print "[%5.2f s]" % (round(t2-t1,2))
218 publish(tmptarget, args.output)
219 publish(args.style+"css", args.output)
220 publish(args.style+"images",args.output)