Initial version of the tree-cutter.py script
[treecutter.git] / src / tree-cutter.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 import subprocess
5 import amara
6 import re
7 import tempfile
8 import errno
9 import time
10 from amara import bindery
11 from amara.xslt import transform
12 from Cheetah.Template import Template
13
14 dist = "."
15 style = "default"
16 style_xslt = dist+"/style/"+style+"/docbook.xsl"
17 style_tmpl = dist+"/style/"+style+"/index.html.tmpl"
18 outputdir = dist+"/htdocs/"
19
20 valid_scripts = ['.py','.pl']
21 MAXLEVEL = 10000
22
23 def mkdir_p(path):
24     try:
25         os.makedirs(path)
26     except OSError as exc: # Python >2.5
27         if exc.errno == errno.EEXIST:
28             pass
29         else: raise
30
31 def generateSitemap():
32   sitemap = []
33   try:
34     sfile = open('sitemap.txt')
35     flist = sfile.read().split()
36     sfile.close()
37     for f in flist:
38       sitemap.append(dict(link=f))
39   except IOError, what_error:
40     print 'Sitemap missing - generating one.'
41   for dirname, dirnames, filenames in os.walk('.'):
42     for filename in filenames:
43       if fnmatch.fnmatch(filename, '*.xml'):
44         xfile = os.path.join(dirname,filename)
45         doc = bindery.parse(xfile,
46                             prefixes={u'db': u'http://docbook.org/ns/docbook',
47                                       u'xi': u'http://www.w3.org/2001/XInclude'})
48         title = doc.xml_select(u'/db:article/db:info/db:title')
49         menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
50         code  = doc.xml_select(u"//xi:include[@parse='text']")
51         exe = 0
52         for c in code:
53           (p, ext) = os.path.splitext(c.href)
54           if ext in valid_scripts:
55             exe = 1
56
57         if title and menu:
58           found = 0
59           base = os.path.splitext(xfile)[0]
60           link = base.translate(None,'.').replace('index','')
61           level = len(filter(None,re.split(r'(/\w*/)',link)))
62           page = dict(title=unicode(doc.article.info.title),
63                       menu=unicode(doc.article.info.titleabbrev),
64                       output=os.path.join(dirname,filename.replace('xml','html')),
65                       exe=exe,
66                       file=xfile,
67                       level=level)
68           for l in sitemap:
69             if l['link'] == link:
70               found = 1
71               l.update(page)
72           if not found:
73             print "adding "+link+" to sitemap"
74             dd = dict(link=link) 
75             dd.update(page)
76             sitemap.append(dd)
77   sfile = open('sitemap.txt','w')
78   for l in sitemap:
79     sfile.write(l['link']+'\n')
80   sfile.close()
81   return sitemap
82
83 def expandXincludeTxt(page):
84   doc = bindery.parse(page['file'],prefixes={u'db': u'http://docbook.org/ns/docbook',
85                                              u'xi': u'http://www.w3.org/2001/XInclude'})
86   if page['exe']:
87     code  = doc.xml_select(u"//xi:include[@parse='text']")
88     for c in code:
89       (p, ext) = os.path.splitext(c.href)
90       if ext in valid_scripts:
91         exe = os.path.join(os.path.abspath(c.href))
92         xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
93         xstr = bindery.parse(str(xml.stdout.read()))
94         id = c.xml_index_on_parent
95         for x in xstr.xml_children:
96           c.xml_parent.xml_insert(id,x)
97         c.xml_parent.xml_remove(c)
98   return doc
99
100 def xsltConvert(doc):
101 #  amara can not handle the docbook stylesheets
102 #  xmlarticle = transform(doc,style_xslt)
103   cwd = os.getcwd()
104   rundir = os.path.dirname(page['file'])
105   os.chdir(rundir)
106   infile  = os.path.basename(tempfile.mktemp())
107   outfile = tempfile.mktemp()
108   tfi = open(infile,'w')
109   tfi.write(doc.xml_encode())
110   tfi.close()
111   cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
112   retcode = subprocess.call(cmd)
113   if retcode:
114     print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
115   tfo = open(outfile,'r')
116   result = tfo.read()
117   tfo.close()
118   os.remove(infile)
119   os.remove(outfile)
120   os.chdir(cwd)
121   return result
122
123 def genMenu(page,sitemap,slevel,elevel):
124   title = None
125   sm = []
126   if elevel == MAXLEVEL or elevel == 1:
127     sm = sitemap        
128   else:
129     idx = sitemap.index(page)
130     while (sitemap[idx]['level'] == page['level']):
131       idx = idx-1
132     title = sitemap[idx]['menu']
133     idx = idx+1
134     while (sitemap[idx]['level'] == page['level']):
135       sm.append(sitemap[idx])
136       idx = idx+1
137   oldlevel = slevel
138
139   html = '<ul>\n'
140   for p in sm:
141     if slevel > p['level'] or elevel < p['level']:
142       continue
143     if not title and p['link'] == '/':
144       title = p['menu']
145
146     if oldlevel < p['level']:
147       html+='<ul>\n'
148     elif oldlevel > p['level']:
149       if p['link'][-1] == '/':
150         html+='</li>\n'
151       html+='</ul>\n</li>\n'
152     if page == p:
153       html+='<li><a href="%s">[%s]</a>' % (p['link'],p['menu'])
154     else:
155       html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
156     if p['link'][-1] != '/' or p['link'] == '/':
157         html+='</li>\n'
158     oldlevel = p['level']
159   html+='</ul>\n'
160   return (html,title)
161
162 def writeToTemplate(page,doc,sitemap):
163   (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
164   (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
165   template = Template(file=style_tmpl,
166                       searchList=[{'menu':menu},
167                                   {'article':doc},
168                                   {'levelmenu':levelmenu},
169                                   {'levelname':levelname}])
170   outfile = outputdir+page['output']
171   d = os.path.split(outfile)[0]
172   if d != '':
173     mkdir_p(d)
174   out = open(outfile, 'w')
175   out.write(str(template))
176
177 sitemap = generateSitemap()
178 for page in sitemap:
179   t1 = time.time()
180   print "Page : "+page['link'],
181   doc = expandXincludeTxt(page)
182   pubdoc = xsltConvert(doc)
183   writeToTemplate(page,pubdoc,sitemap)
184 #  publishResources()
185   t2 = time.time()
186   print "["+str(round(t2-t1,2))+"]  done."
187
188