Initial version of the tree-cutter.py script
authorFredrik Unger <fred@tree.se>
Tue, 15 Feb 2011 16:48:38 +0000 (17:48 +0100)
committerFredrik Unger <fred@tree.se>
Tue, 15 Feb 2011 16:48:38 +0000 (17:48 +0100)
Working with a test structure of articles, using docbook and a
changed bluenightsky template. Language support missing.
Menu generation can be improved.

src/tree-cutter.py [new file with mode: 0755]

diff --git a/src/tree-cutter.py b/src/tree-cutter.py
new file mode 100755 (executable)
index 0000000..782f58a
--- /dev/null
@@ -0,0 +1,188 @@
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+dist = "."
+style = "default"
+style_xslt = dist+"/style/"+style+"/docbook.xsl"
+style_tmpl = dist+"/style/"+style+"/index.html.tmpl"
+outputdir = dist+"/htdocs/"
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc: # Python >2.5
+        if exc.errno == errno.EEXIST:
+            pass
+        else: raise
+
+def generateSitemap():
+  sitemap = []
+  try:
+    sfile = open('sitemap.txt')
+    flist = sfile.read().split()
+    sfile.close()
+    for f in flist:
+      sitemap.append(dict(link=f))
+  except IOError, what_error:
+    print 'Sitemap missing - generating one.'
+  for dirname, dirnames, filenames in os.walk('.'):
+    for filename in filenames:
+      if fnmatch.fnmatch(filename, '*.xml'):
+        xfile = os.path.join(dirname,filename)
+        doc = bindery.parse(xfile,
+                            prefixes={u'db': u'http://docbook.org/ns/docbook',
+                                      u'xi': u'http://www.w3.org/2001/XInclude'})
+        title = doc.xml_select(u'/db:article/db:info/db:title')
+        menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+        code  = doc.xml_select(u"//xi:include[@parse='text']")
+        exe = 0
+        for c in code:
+          (p, ext) = os.path.splitext(c.href)
+          if ext in valid_scripts:
+            exe = 1
+
+        if title and menu:
+          found = 0
+          base = os.path.splitext(xfile)[0]
+          link = base.translate(None,'.').replace('index','')
+          level = len(filter(None,re.split(r'(/\w*/)',link)))
+          page = dict(title=unicode(doc.article.info.title),
+                      menu=unicode(doc.article.info.titleabbrev),
+                      output=os.path.join(dirname,filename.replace('xml','html')),
+                      exe=exe,
+                      file=xfile,
+                      level=level)
+          for l in sitemap:
+            if l['link'] == link:
+              found = 1
+              l.update(page)
+          if not found:
+            print "adding "+link+" to sitemap"
+            dd = dict(link=link) 
+            dd.update(page)
+            sitemap.append(dd)
+  sfile = open('sitemap.txt','w')
+  for l in sitemap:
+    sfile.write(l['link']+'\n')
+  sfile.close()
+  return sitemap
+
+def expandXincludeTxt(page):
+  doc = bindery.parse(page['file'],prefixes={u'db': u'http://docbook.org/ns/docbook',
+                                             u'xi': u'http://www.w3.org/2001/XInclude'})
+  if page['exe']:
+    code  = doc.xml_select(u"//xi:include[@parse='text']")
+    for c in code:
+      (p, ext) = os.path.splitext(c.href)
+      if ext in valid_scripts:
+        exe = os.path.join(os.path.abspath(c.href))
+        xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
+        xstr = bindery.parse(str(xml.stdout.read()))
+        id = c.xml_index_on_parent
+        for x in xstr.xml_children:
+          c.xml_parent.xml_insert(id,x)
+        c.xml_parent.xml_remove(c)
+  return doc
+
+def xsltConvert(doc):
+#  amara can not handle the docbook stylesheets
+#  xmlarticle = transform(doc,style_xslt)
+  cwd = os.getcwd()
+  rundir = os.path.dirname(page['file'])
+  os.chdir(rundir)
+  infile  = os.path.basename(tempfile.mktemp())
+  outfile = tempfile.mktemp()
+  tfi = open(infile,'w')
+  tfi.write(doc.xml_encode())
+  tfi.close()
+  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+  retcode = subprocess.call(cmd)
+  if retcode:
+    print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+  tfo = open(outfile,'r')
+  result = tfo.read()
+  tfo.close()
+  os.remove(infile)
+  os.remove(outfile)
+  os.chdir(cwd)
+  return result
+
+def genMenu(page,sitemap,slevel,elevel):
+  title = None
+  sm = []
+  if elevel == MAXLEVEL or elevel == 1:
+    sm = sitemap        
+  else:
+    idx = sitemap.index(page)
+    while (sitemap[idx]['level'] == page['level']):
+      idx = idx-1
+    title = sitemap[idx]['menu']
+    idx = idx+1
+    while (sitemap[idx]['level'] == page['level']):
+      sm.append(sitemap[idx])
+      idx = idx+1
+  oldlevel = slevel
+
+  html = '<ul>\n'
+  for p in sm:
+    if slevel > p['level'] or elevel < p['level']:
+      continue
+    if not title and p['link'] == '/':
+      title = p['menu']
+
+    if oldlevel < p['level']:
+      html+='<ul>\n'
+    elif oldlevel > p['level']:
+      if p['link'][-1] == '/':
+        html+='</li>\n'
+      html+='</ul>\n</li>\n'
+    if page == p:
+      html+='<li><a href="%s">[%s]</a>' % (p['link'],p['menu'])
+    else:
+      html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
+    if p['link'][-1] != '/' or p['link'] == '/':
+        html+='</li>\n'
+    oldlevel = p['level']
+  html+='</ul>\n'
+  return (html,title)
+
+def writeToTemplate(page,doc,sitemap):
+  (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
+  (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
+  template = Template(file=style_tmpl,
+                      searchList=[{'menu':menu},
+                                  {'article':doc},
+                                  {'levelmenu':levelmenu},
+                                  {'levelname':levelname}])
+  outfile = outputdir+page['output']
+  d = os.path.split(outfile)[0]
+  if d != '':
+    mkdir_p(d)
+  out = open(outfile, 'w')
+  out.write(str(template))
+
+sitemap = generateSitemap()
+for page in sitemap:
+  t1 = time.time()
+  print "Page : "+page['link'],
+  doc = expandXincludeTxt(page)
+  pubdoc = xsltConvert(doc)
+  writeToTemplate(page,pubdoc,sitemap)
+#  publishResources()
+  t2 = time.time()
+  print "["+str(round(t2-t1,2))+"]  done."
+
+