+++ /dev/null
-#!/usr/bin/python
-import os
-import fnmatch
-import subprocess
-import amara
-import re
-import tempfile
-import errno
-import time
-import argparse
-import shutil
-import pygraphviz as pgv
-import glob
-import gettext
-import shutil
-from amara import bindery
-from amara.xslt import transform
-from Cheetah.Template import Template
-
-parser = argparse.ArgumentParser(description='Process docbook article tree.')
-parser.add_argument('--style', nargs='?',
- default=os.path.dirname(os.getcwd())+'/style/default/')
-parser.add_argument('--output', nargs='?',
- default=os.path.dirname(os.getcwd())+'/htdocs/')
-args = parser.parse_args()
-
-style_xslt = args.style+"docbook.xsl"
-outputdir = args.output
-
-tmptarget = tempfile.mkdtemp()+'/'
-
-valid_scripts = ['.py','.pl']
-MAXLEVEL = 10000
-
-def mkdir_p(path):
- try:
- os.makedirs(path)
- except OSError as exc: # Python >2.5
- if exc.errno == errno.EEXIST:
- pass
- else: raise
-
-def publish(src,target):
- cmd = ["rsync","-a","--delete",src,target]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
-
-def ssh_cmd(target, command):
- t = target.split(":")
- c = command.split()
- cmd = ["ssh",t[0],c[0],c[1],t[1]]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
-
-PREFIXES={u'db': u'http://docbook.org/ns/docbook',
- u'xi': u'http://www.w3.org/2001/XInclude',
- u'xl': u'http://www.w3.org/1999/xlink',
- u'html' : u'http://www.w3.org/1999/xhtml'}
-
-class Directory():
- """Class containing the state of the directory with articles"""
- def __init__(self):
- self._cwd = '.'
- self._tree = []
-
- def scan(self):
- for dirname, dirnames, filenames in os.walk(self._cwd):
- for filename in filenames:
- if fnmatch.fnmatch(filename, '*.xml'):
- file_ = os.path.join(dirname,filename)
- doc = bindery.parse(file_, prefixes=PREFIXES)
- title = doc.xml_select(u'/db:article/db:info/db:title')
- menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
- if title and menu:
- base = file_.split('.')[1]
- link = base.replace('index','')
- self._tree.append(link)
-
- def set(self):
- return set(self._tree)
-
-class Page():
- """Class representing a version of a webpage"""
- def __init__(self,link,page):
- self._link = link
- self._file = page[1]
- self._lang = page[0]
- self._doc = None
- self._resources = []
- self._title = None
- self._menu = None
- self._rendered_article = None
-
- def language(self):
- return self._lang
-
- def resources(self):
- return set(self._resources)
-
- def menu(self):
- return self._menu
-
- def set_article(self,art):
- self._rendered_article = art
-
- def prepare(self):
- self._doc = bindery.parse(self._file, prefixes=PREFIXES)
- if self._doc.xml_select(u'/db:article/db:info/db:title'):
- self._title = unicode(self._doc.article.info.title)
- if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
- self._menu = unicode(self._doc.article.info.titleabbrev)
-
- dirname = os.path.dirname(self._file)
- code = self._doc.xml_select(u"//xi:include[@parse='text']")
- if code:
- for c in code:
- (p, ext) = os.path.splitext(c.href)
- if ext in valid_scripts:
- exe = []
- exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
- if c.xml_select(u"//xi:include[@accept-language]"):
- alang = c.xml_attributes[None, "accept-language"]
- exe.append("lang="+alang)
- if c.xml_select(u"//xi:include[@xpointer]"):
- exe.append("xptr="+c.xpointer)
- xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
- xstr = bindery.parse(str(xml.stdout.read()))
- idp = c.xml_index_on_parent
- for x in xstr.xml_children:
- c.xml_parent.xml_insert(idp,x)
- c.xml_parent.xml_remove(c)
-
- for r in self._doc.xml_select(u"//db:link[@xl:href]"):
- rf = os.path.join(dirname,r.href)
- if os.path.isfile(rf):
- self._resources.append(rf)
- for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
- im = os.path.join(dirname,i.fileref)
- if os.path.isfile(im):
- self._resources.append(im)
- for i in self._doc.xml_select(u"//html:form[@action]"):
- pyscript = re.split('\.py',i.action,1)[0]+'.py'
- im = os.path.join(dirname,pyscript)
- if os.path.isfile(im):
- self._resources.append(im)
-
- def render(self):
- # amara can not handle the docbook stylesheets
- # xmlarticle = transform(doc,style_xslt)
- cwd = os.getcwd()
- dirname = os.path.dirname(self._file)
- os.chdir(dirname)
- infile = os.path.basename(tempfile.mktemp())
- outfile = tempfile.mktemp()
- tfi = open(infile,'w')
- tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
- tfi.close()
-# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
- cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
- tfo = open(outfile,'r')
- self._rendered_article = tfo.read()
- tfo.close()
- os.remove(infile)
- os.remove(outfile)
- os.chdir(cwd)
-
- def template(self,sitemap):
- htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
- levelmenu = sitemap.gen_menu(self._lang,self,"tree")
- langmenu = sitemap.lang_menu(self._lang,self._link)
- template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
- searchList=[{'title':self._title},
- {'menu':htmlmenu},
- {'article':self._rendered_article},
- {'levelmenu':levelmenu},
- {'langmenu':langmenu}])
- outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
- mkdir_p(os.path.dirname(outfile))
- out = open(outfile, 'w')
- out.write(str(template))
- out.close()
-
-
-class Link():
- """Class representing a webpage on the site"""
- def __init__(self,link):
- self._link = link
- # find the representations of the link.
- self._pages = []
- path = link
- if self._link[-1] == '/':
- path = path+'index'
- lang = self._scan_languages(path)
- for l in lang:
- self._pages.append(Page(self,l))
-
- def add_page(self,l):
- self._pages.append(Page(self,l))
-
- def _scan_languages(self,path):
- lang = []
- for l in glob.glob('.'+path+'*'):
- ls = l.split('.')
- if len(ls) > 3 and ls[3] == 'xml':
- lang.append((ls[2],l))
- return lang
-
- def link(self):
- return self._link
-
- def prepare(self):
- for page in self._pages:
- page.prepare()
-
- def languages(self):
- p = []
- for page in self._pages:
- p.append(page.language())
- return p
-
- def render(self):
- for page in self._pages:
- page.render()
-
- def template(self,sitemap):
- for page in self._pages:
- page.template(sitemap)
-
- def page(self,lang):
- for page in self._pages:
- if page.language()==lang:
- return page
- return None
-
- def resources(self):
- res = set()
- for page in self._pages:
- res = res.union(page.resources())
- return res
-
-
-class Node():
- def __init__(self,token,value):
- self._token = token
- self._value = value
- self._children = []
-
- def token(self):
- return self._token
-
- def value(self):
- return self._value
-
- def children(self):
- return self._children
-
-class Trie():
- def __init__(self):
- self._root = []
-
- def __iter__(self):
- return self.inorder(self._root)
-
- def inorder(self,t):
- for l in t:
- yield l.value()
- for x in self.inorder(l.children()):
- yield x
-
- def _add(self,trie, key, content):
- # is the key a leaf
- k = key.pop(0)
- if key == []:
- node = Node(k,content)
- trie.append(node)
- else:
- for ch in trie:
- if ch.token() == k:
- self._add(ch.children(), key, content)
-
- def add(self,key, content):
- self._add(self._root, key, content)
-
- def _graph(self, trie, G):
- for l in trie:
- G.add_node(l.token())
- for ch in l.children():
- G.add_edge(l.token(),ch.token())
- self._graph(l.children(), G)
-
- def graph(self):
- G = pgv.AGraph(directed=True)
- G.add_node("sitemap")
- for ch in self._root:
- G.add_edge("sitemap",ch.token())
- self._graph(self._root, G)
-# G.layout('dot')
-# G.draw('g.png')
-# print G.string()
-
- def _menu(self, trie, lang, page, css):
- html = "<ul%s>\n" % css
- for l in trie:
- sel = ''
- p = l.value().page(lang)
- if p == page:
- sel = ' class="selected"'
- if p != None:
- html += '<li%s><a href="%s">%s</a>\n' \
- % (sel,l.value().link(),p.menu())
- else:
- html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
- % (sel,l.value().link(), l.value().page('en').menu())
- if l.children():
- html += self._menu(l.children(), lang, page, "")
- html += "</ul>\n"
- return html
-
- def menu(self,lang,page,cssclass):
- css = ''
- if cssclass:
- css = ' class="'+cssclass+'"'
- return self._menu(self._root, lang, page, css)
-
-class Sitemap():
- """Class keeping the internal site structure"""
- def __init__(self):
- self._file = 'sitemap.txt'
- self._tree = Trie()
- self._sitelang = set()
- self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
- self._tranlang = {}
-
- def add_link(self, link):
- tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
- self._tree.add(tokens,Link(link))
-
- def write_map(self):
- f = open(self._file,'w')
- f.write('\n'.join(link.link() for link in self._tree))
- f.close()
-
- def read_map(self):
- try:
- f = open(self._file)
- sml = f.read().split()
- f.close()
- for line in sml:
- self.add_link(line)
- except IOError, what_error:
- print 'INFO: Could not read sitemap.txt - one will be created'
-
- def set(self):
- return set(link.link() for link in self._tree)
-
- def process(self):
- t1 = time.time()
- for link in self._tree:
- link.prepare()
- t2 = time.time()
- print "Prepare [%5.2f s]" % (round(t2-t1,2))
- for link in self._tree:
- self._sitelang = self._sitelang.union(set(link.languages()))
- for tran in self._sitelang:
- if tran != 'en':
- self._tranlang[tran] = gettext.translation('iso_639_3',
- languages=[tran])
- t3 = time.time()
- print "Language [%5.2f s]" % (round(t3-t2,2))
- for link in self._tree:
- link.render()
- t4 = time.time()
- print "Render [%5.2f s]" % (round(t4-t3,2))
- for link in self._tree:
- link.template(self)
- t5 = time.time()
- print "Template [%5.2f s]" % (round(t5-t4,2))
- t6 = time.time()
- res = set()
- cwd = os.getcwd()
- for link in self._tree:
- res = res.union(link.resources())
- for f in res:
- outfile = tmptarget+f
- mkdir_p(os.path.dirname(outfile))
- shutil.copyfile(f,outfile)
- print "Resources[%5.2f s]" % (round(t6-t5,2))
- sitmaplink = Link('/sitemap')
- for l in self._sitelang:
- sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
- for l in self._sitelang:
- sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
- sitmaplink.page(l).template(self)
- t7 = time.time()
- print "Sitemap [%5.2f s]" % (round(t7-t6,2))
-
- def graph(self):
- self._tree.graph()
-
- def gen_menu(self,lang,page,cssclass):
- return self._tree.menu(lang,page,cssclass)
-
- def lang_menu(self,lang,link):
- html = "<ul>"
- for l in link.languages():
- isoxml = u"//iso_639_3_entry[@*='"+l+"']"
- ln = self._isocode.xml_select(isoxml)[0].name
- if lang != 'en':
- ln = self._tranlang[lang].gettext(ln)
- p = link.link()
- if p[-1] == '/':
- p = p +'index'
- p = p+'.'+l
- html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
- html += "</ul>"
- return html
-
- def publish(self):
- ssh_cmd(args.output,"mkdir -p")
- publish(tmptarget, args.output)
- for res in ["css","images","js","favicon.ico"]:
- if (os.path.exists(args.style+res)):
- publish(args.style+res, args.output)
- ssh_cmd(args.output,"chmod a+rx")
-
-ts = time.time()
-dir_ = Directory()
-sitemap = Sitemap()
-
-dir_.scan()
-sitemap.read_map()
-
-missing = dir_.set() - sitemap.set()
-removed = sitemap.set() - dir_.set()
-for page in removed:
- print page+' pages missing!!'
-for page in missing:
- print 'adding missing page '+page
- sitemap.add_link(page)
-if len(missing)+len(removed) != 0:
- print 'writing new sitemap - please adjust if needed'
- sitemap.write_map()
-sitemap.graph()
-
-sitemap.process()
-
-t1 = time.time()
-sitemap.publish()
-t2 = time.time()
-print "Publish [%5.2f s]" % (round(t2-t1,2))
-print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))
--- /dev/null
+#!/usr/bin/python
+import os
+import fnmatch
+import subprocess
+import amara
+import re
+import tempfile
+import errno
+import time
+import argparse
+import shutil
+import pygraphviz as pgv
+import glob
+import gettext
+import shutil
+from amara import bindery
+from amara.xslt import transform
+from Cheetah.Template import Template
+
+parser = argparse.ArgumentParser(description='Process docbook article tree.')
+parser.add_argument('--style', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/style/default/')
+parser.add_argument('--output', nargs='?',
+ default=os.path.dirname(os.getcwd())+'/htdocs/')
+args = parser.parse_args()
+
+style_xslt = args.style+"docbook.xsl"
+outputdir = args.output
+
+tmptarget = tempfile.mkdtemp()+'/'
+
+valid_scripts = ['.py','.pl']
+MAXLEVEL = 10000
+
+def mkdir_p(path):
+ try:
+ os.makedirs(path)
+ except OSError as exc: # Python >2.5
+ if exc.errno == errno.EEXIST:
+ pass
+ else: raise
+
+def publish(src,target):
+ cmd = ["rsync","-a","--delete",src,target]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+def ssh_cmd(target, command):
+ t = target.split(":")
+ c = command.split()
+ cmd = ["ssh",t[0],c[0],c[1],t[1]]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink',
+ u'html' : u'http://www.w3.org/1999/xhtml'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a version of a webpage"""
+ def __init__(self,link,page):
+ self._link = link
+ self._file = page[1]
+ self._lang = page[0]
+ self._doc = None
+ self._resources = []
+ self._title = None
+ self._menu = None
+ self._rendered_article = None
+
+ def language(self):
+ return self._lang
+
+ def resources(self):
+ return set(self._resources)
+
+ def menu(self):
+ return self._menu
+
+ def set_article(self,art):
+ self._rendered_article = art
+
+ def prepare(self):
+ self._doc = bindery.parse(self._file, prefixes=PREFIXES)
+ if self._doc.xml_select(u'/db:article/db:info/db:title'):
+ self._title = unicode(self._doc.article.info.title)
+ if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
+ self._menu = unicode(self._doc.article.info.titleabbrev)
+
+ dirname = os.path.dirname(self._file)
+ code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ if code:
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = []
+ exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href))
+ if c.xml_select(u"//xi:include[@accept-language]"):
+ alang = c.xml_attributes[None, "accept-language"]
+ exe.append("lang="+alang)
+ if c.xml_select(u"//xi:include[@xpointer]"):
+ exe.append("xptr="+c.xpointer)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xstr = bindery.parse(str(xml.stdout.read()))
+ idp = c.xml_index_on_parent
+ for x in xstr.xml_children:
+ c.xml_parent.xml_insert(idp,x)
+ c.xml_parent.xml_remove(c)
+
+ for r in self._doc.xml_select(u"//db:link[@xl:href]"):
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ self._resources.append(rf)
+ for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ self._resources.append(im)
+ for i in self._doc.xml_select(u"//html:form[@action]"):
+ pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ im = os.path.join(dirname,pyscript)
+ if os.path.isfile(im):
+ self._resources.append(im)
+
+ def render(self):
+ # amara can not handle the docbook stylesheets
+ # xmlarticle = transform(doc,style_xslt)
+ cwd = os.getcwd()
+ dirname = os.path.dirname(self._file)
+ os.chdir(dirname)
+ infile = os.path.basename(tempfile.mktemp())
+ outfile = tempfile.mktemp()
+ tfi = open(infile,'w')
+ tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.close()
+# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
+ cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
+ retcode = subprocess.call(cmd)
+ if retcode:
+ print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+ tfo = open(outfile,'r')
+ self._rendered_article = tfo.read()
+ tfo.close()
+ os.remove(infile)
+ os.remove(outfile)
+ os.chdir(cwd)
+
+ def template(self,sitemap):
+ htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
+ levelmenu = sitemap.gen_menu(self._lang,self,"tree")
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
+ searchList=[{'title':self._title},
+ {'menu':htmlmenu},
+ {'article':self._rendered_article},
+ {'levelmenu':levelmenu},
+ {'langmenu':langmenu}])
+ outfile = tmptarget+'html'.join(self._file.rsplit('xml',1))
+ mkdir_p(os.path.dirname(outfile))
+ out = open(outfile, 'w')
+ out.write(str(template))
+ out.close()
+
+
+class Link():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ # find the representations of the link.
+ self._pages = []
+ path = link
+ if self._link[-1] == '/':
+ path = path+'index'
+ lang = self._scan_languages(path)
+ for l in lang:
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
+
+ def _scan_languages(self,path):
+ lang = []
+ for l in glob.glob('.'+path+'*'):
+ ls = l.split('.')
+ if len(ls) > 3 and ls[3] == 'xml':
+ lang.append((ls[2],l))
+ return lang
+
+ def link(self):
+ return self._link
+
+ def prepare(self):
+ for page in self._pages:
+ page.prepare()
+
+ def languages(self):
+ p = []
+ for page in self._pages:
+ p.append(page.language())
+ return p
+
+ def render(self):
+ for page in self._pages:
+ page.render()
+
+ def template(self,sitemap):
+ for page in self._pages:
+ page.template(sitemap)
+
+ def page(self,lang):
+ for page in self._pages:
+ if page.language()==lang:
+ return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def __iter__(self):
+ return self.inorder(self._root)
+
+ def inorder(self,t):
+ for l in t:
+ yield l.value()
+ for x in self.inorder(l.children()):
+ yield x
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+# print G.string()
+
+ def _menu(self, trie, lang, page, css):
+ html = "<ul%s>\n" % css
+ for l in trie:
+ sel = ''
+ p = l.value().page(lang)
+ if p == page:
+ sel = ' class="selected"'
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
+ html += "</ul>\n"
+ return html
+
+ def menu(self,lang,page,cssclass):
+ css = ''
+ if cssclass:
+ css = ' class="'+cssclass+'"'
+ return self._menu(self._root, lang, page, css)
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._tree = Trie()
+ self._sitelang = set()
+ self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._tranlang = {}
+
+ def add_link(self, link):
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
+ self._tree.add(tokens,Link(link))
+
+ def write_map(self):
+ f = open(self._file,'w')
+ f.write('\n'.join(link.link() for link in self._tree))
+ f.close()
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_link(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(link.link() for link in self._tree)
+
+ def process(self):
+ t1 = time.time()
+ for link in self._tree:
+ link.prepare()
+ t2 = time.time()
+ print "Prepare [%5.2f s]" % (round(t2-t1,2))
+ for link in self._tree:
+ self._sitelang = self._sitelang.union(set(link.languages()))
+ for tran in self._sitelang:
+ if tran != 'en':
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
+ t3 = time.time()
+ print "Language [%5.2f s]" % (round(t3-t2,2))
+ for link in self._tree:
+ link.render()
+ t4 = time.time()
+ print "Render [%5.2f s]" % (round(t4-t3,2))
+ for link in self._tree:
+ link.template(self)
+ t5 = time.time()
+ print "Template [%5.2f s]" % (round(t5-t4,2))
+ t6 = time.time()
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
+
+ def graph(self):
+ self._tree.graph()
+
+ def gen_menu(self,lang,page,cssclass):
+ return self._tree.menu(lang,page,cssclass)
+
+ def lang_menu(self,lang,link):
+ html = "<ul>"
+ for l in link.languages():
+ isoxml = u"//iso_639_3_entry[@*='"+l+"']"
+ ln = self._isocode.xml_select(isoxml)[0].name
+ if lang != 'en':
+ ln = self._tranlang[lang].gettext(ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
+ html += "</ul>"
+ return html
+
+ def publish(self):
+ ssh_cmd(args.output,"mkdir -p")
+ publish(tmptarget, args.output)
+ for res in ["css","images","js","favicon.ico"]:
+ if (os.path.exists(args.style+res)):
+ publish(args.style+res, args.output)
+ ssh_cmd(args.output,"chmod a+rx")
+
+ts = time.time()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print page+' pages missing!!'
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_link(page)
+if len(missing)+len(removed) != 0:
+ print 'writing new sitemap - please adjust if needed'
+ sitemap.write_map()
+sitemap.graph()
+
+sitemap.process()
+
+t1 = time.time()
+sitemap.publish()
+t2 = time.time()
+print "Publish [%5.2f s]" % (round(t2-t1,2))
+print "Total [%5.2f s]" % (round(t2-ts,2))