From: Fredrik Unger Date: Tue, 15 Feb 2011 16:48:38 +0000 (+0100) Subject: Initial version of the tree-cutter.py script X-Git-Tag: v1.0~54 X-Git-Url: https://source.tree.se/git?p=treecutter.git;a=commitdiff_plain;h=837b43478217cfe46c8068c92ffcbfd5d5e521f5 Initial version of the tree-cutter.py script Working with a test structure of articles, using docbook and a changed bluenightsky template. Language support missing. Menu generation can be improved. --- 837b43478217cfe46c8068c92ffcbfd5d5e521f5 diff --git a/src/tree-cutter.py b/src/tree-cutter.py new file mode 100755 index 0000000..782f58a --- /dev/null +++ b/src/tree-cutter.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +import os +import fnmatch +import subprocess +import amara +import re +import tempfile +import errno +import time +from amara import bindery +from amara.xslt import transform +from Cheetah.Template import Template + +dist = "." +style = "default" +style_xslt = dist+"/style/"+style+"/docbook.xsl" +style_tmpl = dist+"/style/"+style+"/index.html.tmpl" +outputdir = dist+"/htdocs/" + +valid_scripts = ['.py','.pl'] +MAXLEVEL = 10000 + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST: + pass + else: raise + +def generateSitemap(): + sitemap = [] + try: + sfile = open('sitemap.txt') + flist = sfile.read().split() + sfile.close() + for f in flist: + sitemap.append(dict(link=f)) + except IOError, what_error: + print 'Sitemap missing - generating one.' + for dirname, dirnames, filenames in os.walk('.'): + for filename in filenames: + if fnmatch.fnmatch(filename, '*.xml'): + xfile = os.path.join(dirname,filename) + doc = bindery.parse(xfile, + prefixes={u'db': u'http://docbook.org/ns/docbook', + u'xi': u'http://www.w3.org/2001/XInclude'}) + title = doc.xml_select(u'/db:article/db:info/db:title') + menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') + code = doc.xml_select(u"//xi:include[@parse='text']") + exe = 0 + for c in code: + (p, ext) = os.path.splitext(c.href) + if ext in valid_scripts: + exe = 1 + + if title and menu: + found = 0 + base = os.path.splitext(xfile)[0] + link = base.translate(None,'.').replace('index','') + level = len(filter(None,re.split(r'(/\w*/)',link))) + page = dict(title=unicode(doc.article.info.title), + menu=unicode(doc.article.info.titleabbrev), + output=os.path.join(dirname,filename.replace('xml','html')), + exe=exe, + file=xfile, + level=level) + for l in sitemap: + if l['link'] == link: + found = 1 + l.update(page) + if not found: + print "adding "+link+" to sitemap" + dd = dict(link=link) + dd.update(page) + sitemap.append(dd) + sfile = open('sitemap.txt','w') + for l in sitemap: + sfile.write(l['link']+'\n') + sfile.close() + return sitemap + +def expandXincludeTxt(page): + doc = bindery.parse(page['file'],prefixes={u'db': u'http://docbook.org/ns/docbook', + u'xi': u'http://www.w3.org/2001/XInclude'}) + if page['exe']: + code = doc.xml_select(u"//xi:include[@parse='text']") + for c in code: + (p, ext) = os.path.splitext(c.href) + if ext in valid_scripts: + exe = os.path.join(os.path.abspath(c.href)) + xml = subprocess.Popen([exe],stdout=subprocess.PIPE) + xstr = bindery.parse(str(xml.stdout.read())) + id = c.xml_index_on_parent + for x in xstr.xml_children: + c.xml_parent.xml_insert(id,x) + c.xml_parent.xml_remove(c) + return doc + +def xsltConvert(doc): +# amara can not handle the docbook stylesheets +# xmlarticle = transform(doc,style_xslt) + cwd = os.getcwd() + rundir = os.path.dirname(page['file']) + os.chdir(rundir) + infile = os.path.basename(tempfile.mktemp()) + outfile = tempfile.mktemp() + tfi = open(infile,'w') + tfi.write(doc.xml_encode()) + tfi.close() + cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] + retcode = subprocess.call(cmd) + if retcode: + print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' + tfo = open(outfile,'r') + result = tfo.read() + tfo.close() + os.remove(infile) + os.remove(outfile) + os.chdir(cwd) + return result + +def genMenu(page,sitemap,slevel,elevel): + title = None + sm = [] + if elevel == MAXLEVEL or elevel == 1: + sm = sitemap + else: + idx = sitemap.index(page) + while (sitemap[idx]['level'] == page['level']): + idx = idx-1 + title = sitemap[idx]['menu'] + idx = idx+1 + while (sitemap[idx]['level'] == page['level']): + sm.append(sitemap[idx]) + idx = idx+1 + oldlevel = slevel + + html = '\n' + return (html,title) + +def writeToTemplate(page,doc,sitemap): + (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL) + (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level']) + template = Template(file=style_tmpl, + searchList=[{'menu':menu}, + {'article':doc}, + {'levelmenu':levelmenu}, + {'levelname':levelname}]) + outfile = outputdir+page['output'] + d = os.path.split(outfile)[0] + if d != '': + mkdir_p(d) + out = open(outfile, 'w') + out.write(str(template)) + +sitemap = generateSitemap() +for page in sitemap: + t1 = time.time() + print "Page : "+page['link'], + doc = expandXincludeTxt(page) + pubdoc = xsltConvert(doc) + writeToTemplate(page,pubdoc,sitemap) +# publishResources() + t2 = time.time() + print "["+str(round(t2-t1,2))+"] done." + +