import tempfile
import errno
import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
from amara import bindery
from amara.xslt import transform
from Cheetah.Template import Template
-dist = os.path.dirname(os.getcwd())
-style = "default"
-style_xslt = dist+"/style/"+style+"/docbook.xsl"
-style_tmpl = dist+"/style/"+style+"/index.html.tmpl"
-outputdir = dist+"/htdocs/"
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+style_tmpl = args.style+"index.en.html.tmpl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
valid_scripts = ['.py','.pl']
MAXLEVEL = 10000
pass
else: raise
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,page):
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = os.path.join(os.path.abspath(dirname+c.href))
+ xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode())
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,None)
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ template = Template(file=style_tmpl,
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'levelname':'Menu'}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ if l.value().page(lang) == page:
+ sel = ' class="selected"'
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),l.value().page(lang).menu())
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page):
+ return 'Generate menu from sitemap - To be implemented'
+
def generateSitemap():
- sitemap = []
- try:
- sfile = open('sitemap.txt')
- flist = sfile.read().split()
+ sitemap = []
+ try:
+ sfile = open('sitemap.txt')
+ flist = sfile.read().split()
+ sfile.close()
+ for f in flist:
+ sitemap.append(dict(link=f))
+ except IOError, what_error:
+ print 'Sitemap missing - generating one.'
+
+ for dirname, dirnames, filenames in os.walk('.'):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ xfile = os.path.join(dirname,filename)
+ doc = bindery.parse(xfile,
+ prefixes={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink'})
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ code = doc.xml_select(u"//xi:include[@parse='text']")
+ resource = doc.xml_select(u"//db:link[@xl:href]")
+ image = doc.xml_select(u"//db:imagedata[@fileref]")
+ exe = 0
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = 1
+
+ if title and menu:
+ found = 0
+ base = xfile.split('.')[1]
+ link = base.replace('index','')
+ level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
+ res = []
+ for r in resource:
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ res.append(rf)
+ for i in image:
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ res.append(im)
+ page = dict(title=unicode(doc.article.info.title),
+ menu=unicode(doc.article.info.titleabbrev),
+ output=os.path.join(dirname,
+ filename.replace('xml','html')),
+ exe=exe,
+ file=xfile,
+ res=res,
+ level=level)
+ for l in sitemap:
+ if l['link'] == link:
+ found = 1
+ l.update(page)
+ if not found:
+ print "adding "+link+" to sitemap"
+ dd = dict(link=link)
+ dd.update(page)
+ sitemap.append(dd)
+ sfile = open('sitemap.txt','w')
+ for l in sitemap:
+ sfile.write(l['link']+'\n')
sfile.close()
- for f in flist:
- sitemap.append(dict(link=f))
- except IOError, what_error:
- print 'Sitemap missing - generating one.'
- for dirname, dirnames, filenames in os.walk('.'):
- for filename in filenames:
- if fnmatch.fnmatch(filename, '*.xml'):
- xfile = os.path.join(dirname,filename)
- doc = bindery.parse(xfile,
- prefixes={u'db': u'http://docbook.org/ns/docbook',
- u'xi': u'http://www.w3.org/2001/XInclude'})
- title = doc.xml_select(u'/db:article/db:info/db:title')
- menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
- code = doc.xml_select(u"//xi:include[@parse='text']")
- exe = 0
- for c in code:
- (p, ext) = os.path.splitext(c.href)
- if ext in valid_scripts:
- exe = 1
-
- if title and menu:
- found = 0
- base = os.path.splitext(xfile)[0]
- link = base.translate(None,'.').replace('index','')
- level = len(filter(None,re.split(r'(/\w*/)',link)))
- page = dict(title=unicode(doc.article.info.title),
- menu=unicode(doc.article.info.titleabbrev),
- output=os.path.join(dirname,filename.replace('xml','html')),
- exe=exe,
- file=xfile,
- level=level)
- for l in sitemap:
- if l['link'] == link:
- found = 1
- l.update(page)
- if not found:
- print "adding "+link+" to sitemap"
- dd = dict(link=link)
- dd.update(page)
- sitemap.append(dd)
- sfile = open('sitemap.txt','w')
- for l in sitemap:
- sfile.write(l['link']+'\n')
- sfile.close()
- return sitemap
+ return sitemap
def expandXincludeTxt(page):
- doc = bindery.parse(page['file'],prefixes={u'db': u'http://docbook.org/ns/docbook',
- u'xi': u'http://www.w3.org/2001/XInclude'})
- if page['exe']:
- code = doc.xml_select(u"//xi:include[@parse='text']")
- for c in code:
- (p, ext) = os.path.splitext(c.href)
- if ext in valid_scripts:
- exe = os.path.join(os.path.abspath(c.href))
- xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
- xstr = bindery.parse(str(xml.stdout.read()))
- id = c.xml_index_on_parent
- for x in xstr.xml_children:
- c.xml_parent.xml_insert(id,x)
- c.xml_parent.xml_remove(c)
- return doc
+ doc = bindery.parse(page['file'],
+ prefixes={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude'})
+ if page['exe']:
+ code = doc.xml_select(u"//xi:include[@parse='text']")
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = os.path.join(os.path.abspath(c.href))
+ xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ id = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(id,x)
+ c.xml_parent.xml_remove(c)
+ return doc
def xsltConvert(doc):
# amara can not handle the docbook stylesheets
# xmlarticle = transform(doc,style_xslt)
- cwd = os.getcwd()
- rundir = os.path.dirname(page['file'])
- os.chdir(rundir)
- infile = os.path.basename(tempfile.mktemp())
- outfile = tempfile.mktemp()
- tfi = open(infile,'w')
- tfi.write(doc.xml_encode())
- tfi.close()
+ cwd = os.getcwd()
+ rundir = os.path.dirname(page['file'])
+ os.chdir(rundir)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(doc.xml_encode())
+ tfi.close()
# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
- cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
- tfo = open(outfile,'r')
- result = tfo.read()
- tfo.close()
- os.remove(infile)
- os.remove(outfile)
- os.chdir(cwd)
- return result
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ result = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+ return result
def genMenu(page,sitemap,slevel,elevel):
- title = None
- sm = []
- if elevel == MAXLEVEL or elevel == 1:
- sm = sitemap
- else:
- idx = sitemap.index(page)
- while (sitemap[idx]['level'] == page['level']):
- idx = idx-1
- title = sitemap[idx]['menu']
- idx = idx+1
- while (sitemap[idx]['level'] == page['level']):
- sm.append(sitemap[idx])
- idx = idx+1
- oldlevel = slevel
-
- html = '<ul>\n'
- for p in sm:
- if slevel > p['level'] or elevel < p['level']:
- continue
- if not title and p['link'] == '/':
- title = p['menu']
-
- if oldlevel < p['level']:
- html+='<ul>\n'
- elif oldlevel > p['level']:
- if p['link'][-1] == '/':
- html+='</li>\n'
- html+='</ul>\n</li>\n'
- if page == p:
- html+='<li><a href="%s">[%s]</a>' % (p['link'],p['menu'])
+ title = None
+ sm = []
+ if elevel == MAXLEVEL or elevel == 1 or page == None:
+ html = '<ul>\n'
+ sm = sitemap
else:
- html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
- if p['link'][-1] != '/' or p['link'] == '/':
- html+='</li>\n'
- oldlevel = p['level']
- html+='</ul>\n'
- return (html,title)
+ html = '<ul class="tree">\n'
+ idx = sitemap.index(page)
+ while (sitemap[idx]['level'] == page['level']):
+ idx = idx-1
+ title = sitemap[idx]['menu']
+ idx = idx+1
+ while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
+ sm.append(sitemap[idx])
+ idx = idx+1
+ oldlevel = slevel
+
+ for p in sm:
+ if slevel > p['level'] or elevel < p['level']:
+ continue
+ if not title and p['link'] == '/':
+ title = p['menu']
+
+ if oldlevel < p['level']:
+ html+='<ul>\n'
+ elif oldlevel > p['level']:
+ if p['link'][-1] == '/':
+ html+='</li>\n'
+ html+='</ul>\n</li>\n'
+ if page != None and page == p:
+ html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
+ else:
+ html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
+ if p['link'][-1] != '/' or p['link'] == '/':
+ html+='</li>\n'
+ oldlevel = p['level']
+ html+='</ul>\n'
+ return (html,title)
def writeToTemplate(page,doc,sitemap):
- (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
- (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
- template = Template(file=style_tmpl,
- searchList=[{'menu':menu},
- {'article':doc},
- {'levelmenu':levelmenu},
- {'levelname':levelname}])
- outfile = outputdir+page['output']
- d = os.path.split(outfile)[0]
- if d != '':
- mkdir_p(d)
- out = open(outfile, 'w')
- out.write(str(template))
+ (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
+ (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
+ template = Template(file=style_tmpl,
+ searchList=[{'title':page['title']},
+ {'menu':menu},
+ {'article':doc},
+ {'levelmenu':levelmenu},
+ {'levelname':levelname}])
+ outfile = tmptarget+page['output']
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+ for r in page['res']:
+ mkdir_p(os.path.dirname(tmptarget+r))
+ shutil.copyfile(r, tmptarget+r)
+
+def createSitemap(sitemap):
+ (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
+ template = Template(file=style_tmpl,
+ searchList=[
+ {'title':'Sitemap'},
+ {'menu':menu},
+ {'article':menu},
+ {'levelmenu':''},
+ {'levelname':''}])
+ outfile = tmptarget+'sitemap.en.html'
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print removed+' pages missing!!'
+
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_page(page)
+
+sitemap.graph()
+
sitemap = generateSitemap()
+tmptarget = tempfile.mkdtemp()+'/'
for page in sitemap:
- t1 = time.time()
- print "Page : "+page['link'],
- doc = expandXincludeTxt(page)
- pubdoc = xsltConvert(doc)
- writeToTemplate(page,pubdoc,sitemap)
-# publishResources()
- t2 = time.time()
- print "["+str(round(t2-t1,2))+"] done."
-
+ t1 = time.time()
+ print "Page : %-30s %30s" % (page['link'],
+ time.ctime(os.stat(page['file']).st_mtime)),
+ doc = expandXincludeTxt(page)
+ pubdoc = xsltConvert(doc)
+ writeToTemplate(page,pubdoc,sitemap)
+ t2 = time.time()
+ print "[%5.2f s]" % (round(t2-t1,2))
+createSitemap(sitemap)
+publish(tmptarget, args.output)
+publish(args.style+"css", args.output)
+publish(args.style+"images",args.output)