import time
import argparse
import shutil
+import pygraphviz as pgv
+import glob
from amara import bindery
from amara.xslt import transform
from Cheetah.Template import Template
style_tmpl = args.style+"index.en.html.tmpl"
outputdir = args.output
+tmptarget = tempfile.mkdtemp()+'/'
+
valid_scripts = ['.py','.pl']
MAXLEVEL = 10000
if retcode:
print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,page):
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = os.path.join(os.path.abspath(dirname+c.href))
+ xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode())
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,None)
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ template = Template(file=style_tmpl,
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'levelname':'Menu'}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ if l.value().page(lang) == page:
+ sel = ' class="selected"'
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),l.value().page(lang).menu())
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
def generateSitemap():
sitemap = []
try:
out.write(str(template))
out.close()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print removed+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing & removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
sitemap = generateSitemap()