src/tree-cutter.py

   1 #!/usr/bin/python
   2 import os
   3 import fnmatch
   4 import subprocess
   5 import amara
   6 import re
   7 import tempfile
   8 import errno
   9 import time
  10 import argparse
  11 import shutil
  12 from amara import bindery
  13 from amara.xslt import transform
  14 from Cheetah.Template import Template
  15
  16 parser = argparse.ArgumentParser(description='Process docbook article tree.')
  17 parser.add_argument('--style', nargs='?',
  18                     default=os.path.dirname(os.getcwd())+'/style/default/')
  19 parser.add_argument('--output', nargs='?',
  20                     default=os.path.dirname(os.getcwd())+'/htdocs/')
  21 args = parser.parse_args()
  22
  23 style_xslt = args.style+"docbook.xsl"
  24 style_tmpl = args.style+"index.en.html.tmpl"
  25 outputdir = args.output
  26
  27 valid_scripts = ['.py','.pl']
  28 MAXLEVEL = 10000
  29
  30 def mkdir_p(path):
  31     try:
  32         os.makedirs(path)
  33     except OSError as exc: # Python >2.5
  34         if exc.errno == errno.EEXIST:
  35             pass
  36         else: raise
  37
  38 def publish(src,target):
  39     cmd = ["rsync","-a",src,target]
  40     retcode = subprocess.call(cmd)
  41     if retcode:
  42         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
  43
  44 def generateSitemap():
  45   sitemap = []
  46   try:
  47     sfile = open('sitemap.txt')
  48     flist = sfile.read().split()
  49     sfile.close()
  50     for f in flist:
  51       sitemap.append(dict(link=f))
  52   except IOError, what_error:
  53     print 'Sitemap missing - generating one.'
  54   for dirname, dirnames, filenames in os.walk('.'):
  55     for filename in filenames:
  56       if fnmatch.fnmatch(filename, '*.xml'):
  57         xfile = os.path.join(dirname,filename)
  58         doc = bindery.parse(xfile,
  59                             prefixes={u'db': u'http://docbook.org/ns/docbook',
  60                                       u'xi': u'http://www.w3.org/2001/XInclude',
  61                                       u'xl': u'http://www.w3.org/1999/xlink'})
  62         title = doc.xml_select(u'/db:article/db:info/db:title')
  63         menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
  64         code  = doc.xml_select(u"//xi:include[@parse='text']")
  65         resource = doc.xml_select(u"//db:link[@xl:href]")
  66         image = doc.xml_select(u"//db:imagedata[@fileref]")
  67         exe = 0
  68         for c in code:
  69           (p, ext) = os.path.splitext(c.href)
  70           if ext in valid_scripts:
  71             exe = 1
  72
  73         if title and menu:
  74           found = 0
  75           base = xfile.split('.')[1]
  76           link = base.replace('index','')
  77           level = len(filter(None,re.split(r'(/\w*/)',link)))
  78           res = []
  79           for r in resource:
  80               rf = os.path.join(dirname,r.href)
  81               if os.path.isfile(rf):
  82                   res.append(rf)
  83           for i in image:
  84               im = os.path.join(dirname,i.fileref)
  85               if os.path.isfile(im):
  86                   res.append(im)
  87           page = dict(title=unicode(doc.article.info.title),
  88                       menu=unicode(doc.article.info.titleabbrev),
  89                       output=os.path.join(dirname,
  90                                           filename.replace('xml','html')),
  91                       exe=exe,
  92                       file=xfile,
  93                       res=res,
  94                       level=level)
  95           for l in sitemap:
  96             if l['link'] == link:
  97               found = 1
  98               l.update(page)
  99           if not found:
 100             print "adding "+link+" to sitemap"
 101             dd = dict(link=link)
 102             dd.update(page)
 103             sitemap.append(dd)
 104   sfile = open('sitemap.txt','w')
 105   for l in sitemap:
 106     sfile.write(l['link']+'\n')
 107   sfile.close()
 108   return sitemap
 109
 110 def expandXincludeTxt(page):
 111   doc = bindery.parse(page['file'],
 112                       prefixes={u'db': u'http://docbook.org/ns/docbook',
 113                                 u'xi': u'http://www.w3.org/2001/XInclude'})
 114   if page['exe']:
 115     code  = doc.xml_select(u"//xi:include[@parse='text']")
 116     for c in code:
 117       (p, ext) = os.path.splitext(c.href)
 118       if ext in valid_scripts:
 119         exe = os.path.join(os.path.abspath(c.href))
 120         xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
 121         xstr = bindery.parse(str(xml.stdout.read()))
 122         id = c.xml_index_on_parent
 123         for x in xstr.xml_children:
 124           c.xml_parent.xml_insert(id,x)
 125         c.xml_parent.xml_remove(c)
 126   return doc
 127
 128 def xsltConvert(doc):
 129 #  amara can not handle the docbook stylesheets
 130 #  xmlarticle = transform(doc,style_xslt)
 131   cwd = os.getcwd()
 132   rundir = os.path.dirname(page['file'])
 133   os.chdir(rundir)
 134   infile  = os.path.basename(tempfile.mktemp())
 135   outfile = tempfile.mktemp()
 136   tfi = open(infile,'w')
 137   tfi.write(doc.xml_encode())
 138   tfi.close()
 139 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
 140   cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
 141   retcode = subprocess.call(cmd)
 142   if retcode:
 143     print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
 144   tfo = open(outfile,'r')
 145   result = tfo.read()
 146   tfo.close()
 147   os.remove(infile)
 148   os.remove(outfile)
 149   os.chdir(cwd)
 150   return result
 151
 152 def genMenu(page,sitemap,slevel,elevel):
 153   title = None
 154   sm = []
 155   if elevel == MAXLEVEL or elevel == 1:
 156     sm = sitemap
 157   else:
 158     idx = sitemap.index(page)
 159     while (sitemap[idx]['level'] == page['level']):
 160       idx = idx-1
 161     title = sitemap[idx]['menu']
 162     idx = idx+1
 163     while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
 164       sm.append(sitemap[idx])
 165       idx = idx+1
 166   oldlevel = slevel
 167   html = '<ul>\n'
 168   for p in sm:
 169     if slevel > p['level'] or elevel < p['level']:
 170       continue
 171     if not title and p['link'] == '/':
 172       title = p['menu']
 173
 174     if oldlevel < p['level']:
 175       html+='<ul>\n'
 176     elif oldlevel > p['level']:
 177       if p['link'][-1] == '/':
 178         html+='</li>\n'
 179       html+='</ul>\n</li>\n'
 180     if page == p:
 181       html+='<li><a href="%s">[%s]</a>' % (p['link'],p['menu'])
 182     else:
 183       html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
 184     if p['link'][-1] != '/' or p['link'] == '/':
 185         html+='</li>\n'
 186     oldlevel = p['level']
 187   html+='</ul>\n'
 188   return (html,title)
 189
 190 def writeToTemplate(page,doc,sitemap):
 191   (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
 192   (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
 193   template = Template(file=style_tmpl,
 194                       searchList=[{'title':page['title']},
 195                                   {'menu':menu},
 196                                   {'article':doc},
 197                                   {'levelmenu':levelmenu},
 198                                   {'levelname':levelname}])
 199   outfile = tmptarget+page['output']
 200   mkdir_p(os.path.dirname(outfile))
 201   out = open(outfile, 'w')
 202   out.write(str(template))
 203   out.close()
 204   for r in page['res']:
 205       mkdir_p(os.path.dirname(tmptarget+r))
 206       shutil.copyfile(r, tmptarget+r)
 207 sitemap = generateSitemap()
 208 tmptarget = tempfile.mkdtemp()+'/'
 209 for page in sitemap:
 210   t1 = time.time()
 211   print "Page : %-30s %30s" % (page['link'],
 212                       time.ctime(os.stat(page['file']).st_mtime)),
 213   doc = expandXincludeTxt(page)
 214   pubdoc = xsltConvert(doc)
 215   writeToTemplate(page,pubdoc,sitemap)
 216   t2 = time.time()
 217   print "[%5.2f s]" % (round(t2-t1,2))
 218 publish(tmptarget, args.output)
 219 publish(args.style+"css", args.output)
 220 publish(args.style+"images",args.output)