src/tree-cutter.py

   1 #!/usr/bin/python
   2 import os
   3 import fnmatch
   4 import subprocess
   5 import amara
   6 import re
   7 import tempfile
   8 import errno
   9 import time
  10 import argparse
  11 import shutil
  12 from amara import bindery
  13 from amara.xslt import transform
  14 from Cheetah.Template import Template
  15
  16 parser = argparse.ArgumentParser(description='Process docbook article tree.')
  17 parser.add_argument('--style', nargs='?',
  18                     default=os.path.dirname(os.getcwd())+'/style/default/')
  19 parser.add_argument('--output', nargs='?',
  20                     default=os.path.dirname(os.getcwd())+'/htdocs/')
  21 args = parser.parse_args()
  22
  23 style_xslt = args.style+"docbook.xsl"
  24 style_tmpl = args.style+"index.en.html.tmpl"
  25 outputdir = args.output
  26
  27 valid_scripts = ['.py','.pl']
  28 MAXLEVEL = 10000
  29
  30 def mkdir_p(path):
  31     try:
  32         os.makedirs(path)
  33     except OSError as exc: # Python >2.5
  34         if exc.errno == errno.EEXIST:
  35             pass
  36         else: raise
  37
  38 def publish(src,target):
  39     cmd = ["rsync","-a","--delete",src,target]
  40     retcode = subprocess.call(cmd)
  41     if retcode:
  42         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
  43
  44 def generateSitemap():
  45     sitemap = []
  46     try:
  47         sfile = open('sitemap.txt')
  48         flist = sfile.read().split()
  49         sfile.close()
  50         for f in flist:
  51             sitemap.append(dict(link=f))
  52     except IOError, what_error:
  53         print 'Sitemap missing - generating one.'
  54
  55     for dirname, dirnames, filenames in os.walk('.'):
  56         for filename in filenames:
  57             if fnmatch.fnmatch(filename, '*.xml'):
  58                 xfile = os.path.join(dirname,filename)
  59                 doc = bindery.parse(xfile,
  60                                     prefixes={u'db': u'http://docbook.org/ns/docbook',
  61                                               u'xi': u'http://www.w3.org/2001/XInclude',
  62                                               u'xl': u'http://www.w3.org/1999/xlink'})
  63                 title = doc.xml_select(u'/db:article/db:info/db:title')
  64                 menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
  65                 code  = doc.xml_select(u"//xi:include[@parse='text']")
  66                 resource = doc.xml_select(u"//db:link[@xl:href]")
  67                 image = doc.xml_select(u"//db:imagedata[@fileref]")
  68                 exe = 0
  69                 for c in code:
  70                     (p, ext) = os.path.splitext(c.href)
  71                     if ext in valid_scripts:
  72                         exe = 1
  73
  74                 if title and menu:
  75                     found = 0
  76                     base = xfile.split('.')[1]
  77                     link = base.replace('index','')
  78                     level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
  79                     res = []
  80                     for r in resource:
  81                         rf = os.path.join(dirname,r.href)
  82                         if os.path.isfile(rf):
  83                             res.append(rf)
  84                     for i in image:
  85                         im = os.path.join(dirname,i.fileref)
  86                         if os.path.isfile(im):
  87                             res.append(im)
  88                     page = dict(title=unicode(doc.article.info.title),
  89                                 menu=unicode(doc.article.info.titleabbrev),
  90                                 output=os.path.join(dirname,
  91                                                     filename.replace('xml','html')),
  92                                 exe=exe,
  93                                 file=xfile,
  94                                 res=res,
  95                                 level=level)
  96                     for l in sitemap:
  97                         if l['link'] == link:
  98                             found = 1
  99                             l.update(page)
 100                     if not found:
 101                         print "adding "+link+" to sitemap"
 102                         dd = dict(link=link)
 103                         dd.update(page)
 104                         sitemap.append(dd)
 105     sfile = open('sitemap.txt','w')
 106     for l in sitemap:
 107         sfile.write(l['link']+'\n')
 108     sfile.close()
 109     return sitemap
 110
 111 def expandXincludeTxt(page):
 112     doc = bindery.parse(page['file'],
 113                         prefixes={u'db': u'http://docbook.org/ns/docbook',
 114                                   u'xi': u'http://www.w3.org/2001/XInclude'})
 115     if page['exe']:
 116         code  = doc.xml_select(u"//xi:include[@parse='text']")
 117         for c in code:
 118             (p, ext) = os.path.splitext(c.href)
 119             if ext in valid_scripts:
 120                 exe = os.path.join(os.path.abspath(c.href))
 121                 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
 122                 xstr = bindery.parse(str(xml.stdout.read()))
 123                 id = c.xml_index_on_parent
 124                 for x in xstr.xml_children:
 125                     c.xml_parent.xml_insert(id,x)
 126                 c.xml_parent.xml_remove(c)
 127     return doc
 128
 129 def xsltConvert(doc):
 130 #  amara can not handle the docbook stylesheets
 131 #  xmlarticle = transform(doc,style_xslt)
 132     cwd = os.getcwd()
 133     rundir = os.path.dirname(page['file'])
 134     os.chdir(rundir)
 135     infile  = os.path.basename(tempfile.mktemp())
 136     outfile = tempfile.mktemp()
 137     tfi = open(infile,'w')
 138     tfi.write(doc.xml_encode())
 139     tfi.close()
 140 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
 141     cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
 142     retcode = subprocess.call(cmd)
 143     if retcode:
 144         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
 145     tfo = open(outfile,'r')
 146     result = tfo.read()
 147     tfo.close()
 148     os.remove(infile)
 149     os.remove(outfile)
 150     os.chdir(cwd)
 151     return result
 152
 153 def genMenu(page,sitemap,slevel,elevel):
 154     title = None
 155     sm = []
 156     if elevel == MAXLEVEL or elevel == 1 or page == None:
 157         html = '<ul>\n'
 158         sm = sitemap
 159     else:
 160         html = '<ul class="tree">\n'
 161         idx = sitemap.index(page)
 162         while (sitemap[idx]['level'] == page['level']):
 163             idx = idx-1
 164         title = sitemap[idx]['menu']
 165         idx = idx+1
 166         while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
 167             sm.append(sitemap[idx])
 168             idx = idx+1
 169     oldlevel = slevel
 170
 171     for p in sm:
 172         if slevel > p['level'] or elevel < p['level']:
 173             continue
 174         if not title and p['link'] == '/':
 175             title = p['menu']
 176
 177         if oldlevel < p['level']:
 178             html+='<ul>\n'
 179         elif oldlevel > p['level']:
 180             if p['link'][-1] == '/':
 181                 html+='</li>\n'
 182             html+='</ul>\n</li>\n'
 183         if page != None and page == p:
 184             html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
 185         else:
 186             html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
 187         if p['link'][-1] != '/' or p['link'] == '/':
 188             html+='</li>\n'
 189         oldlevel = p['level']
 190     html+='</ul>\n'
 191     return (html,title)
 192
 193 def writeToTemplate(page,doc,sitemap):
 194     (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
 195     (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
 196     template = Template(file=style_tmpl,
 197                         searchList=[{'title':page['title']},
 198                                     {'menu':menu},
 199                                     {'article':doc},
 200                                     {'levelmenu':levelmenu},
 201                                     {'levelname':levelname}])
 202     outfile = tmptarget+page['output']
 203     mkdir_p(os.path.dirname(outfile))
 204     out = open(outfile, 'w')
 205     out.write(str(template))
 206     out.close()
 207     for r in page['res']:
 208         mkdir_p(os.path.dirname(tmptarget+r))
 209         shutil.copyfile(r, tmptarget+r)
 210
 211 def createSitemap(sitemap):
 212     (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
 213     template = Template(file=style_tmpl,
 214                         searchList=[
 215             {'title':'Sitemap'},
 216             {'menu':menu},
 217             {'article':menu},
 218             {'levelmenu':''},
 219             {'levelname':''}])
 220     outfile = tmptarget+'sitemap.en.html'
 221     mkdir_p(os.path.dirname(outfile))
 222     out = open(outfile, 'w')
 223     out.write(str(template))
 224     out.close()
 225
 226
 227
 228 sitemap = generateSitemap()
 229 tmptarget = tempfile.mkdtemp()+'/'
 230 for page in sitemap:
 231     t1 = time.time()
 232     print "Page : %-30s %30s" % (page['link'],
 233                         time.ctime(os.stat(page['file']).st_mtime)),
 234     doc = expandXincludeTxt(page)
 235     pubdoc = xsltConvert(doc)
 236     writeToTemplate(page,pubdoc,sitemap)
 237     t2 = time.time()
 238     print "[%5.2f s]" % (round(t2-t1,2))
 239
 240 createSitemap(sitemap)
 241 publish(tmptarget, args.output)
 242 publish(args.style+"css", args.output)
 243 publish(args.style+"images",args.output)