From: Fredrik Unger Date: Mon, 2 Apr 2012 14:32:55 +0000 (+0200) Subject: Refactoring out every class into separate files. main,tools and const are not classes. X-Git-Url: https://source.tree.se/git?p=treecutter.git;a=commitdiff_plain;h=cbc71d935810a88200aeeff34d0d7aaa56c81a71 Refactoring out every class into separate files. main,tools and const are not classes. --- diff --git a/treecutter/const.py b/treecutter/const.py index 6f03ff7..924dfde 100755 --- a/treecutter/const.py +++ b/treecutter/const.py @@ -1,456 +1,7 @@ #!/usr/bin/python -import os -import fnmatch -import subprocess -import amara -import re -import tempfile -import errno -import time -import argparse -import shutil -import pygraphviz as pgv -import glob -import gettext -import shutil -from amara import bindery -from amara.xslt import transform -from Cheetah.Template import Template - -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' PREFIXES={u'db': u'http://docbook.org/ns/docbook', u'xi': u'http://www.w3.org/2001/XInclude', u'xl': u'http://www.w3.org/1999/xlink', u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/directory.py b/treecutter/directory.py index 6f03ff7..dd8ae44 100755 --- a/treecutter/directory.py +++ b/treecutter/directory.py @@ -17,48 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - class Directory(): """Class containing the state of the directory with articles""" def __init__(self): @@ -80,377 +38,3 @@ class Directory(): def set(self): return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/link.py b/treecutter/link.py index 6f03ff7..b5dcf83 100755 --- a/treecutter/link.py +++ b/treecutter/link.py @@ -17,175 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - class Link(): """Class representing a webpage on the site""" def __init__(self,link): @@ -242,215 +73,3 @@ class Link(): for page in self._pages: res = res.union(page.resources()) return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/main.py b/treecutter/main.py index 6f03ff7..3c30824 100755 --- a/treecutter/main.py +++ b/treecutter/main.py @@ -29,405 +29,8 @@ outputdir = args.output tmptarget = tempfile.mkdtemp()+'/' -valid_scripts = ['.py','.pl'] MAXLEVEL = 10000 -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - ts = time.time() dir_ = Directory() sitemap = Sitemap() diff --git a/treecutter/page.py b/treecutter/page.py index 6f03ff7..a6c5502 100755 --- a/treecutter/page.py +++ b/treecutter/page.py @@ -17,70 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - class Page(): """Class representing a version of a webpage""" def __init__(self,link,page): @@ -184,273 +120,3 @@ class Page(): out = open(outfile, 'w') out.write(str(template)) out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/sitemap.py b/treecutter/sitemap.py index 6f03ff7..11467de 100755 --- a/treecutter/sitemap.py +++ b/treecutter/sitemap.py @@ -17,316 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - class Sitemap(): """Class keeping the internal site structure""" def __init__(self): @@ -427,30 +117,3 @@ class Sitemap(): if (os.path.exists(args.style+res)): publish(args.style+res, args.output) ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/tools.py b/treecutter/tools.py index 6f03ff7..f6dd79b 100755 --- a/treecutter/tools.py +++ b/treecutter/tools.py @@ -17,21 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - def mkdir_p(path): try: os.makedirs(path) @@ -53,404 +38,3 @@ def ssh_cmd(target, command): retcode = subprocess.call(cmd) if retcode: print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - -class Node(): - def __init__(self,token,value): - self._token = token - self._value = value - self._children = [] - - def token(self): - return self._token - - def value(self): - return self._value - - def children(self): - return self._children - -class Trie(): - def __init__(self): - self._root = [] - - def __iter__(self): - return self.inorder(self._root) - - def inorder(self,t): - for l in t: - yield l.value() - for x in self.inorder(l.children()): - yield x - - def _add(self,trie, key, content): - # is the key a leaf - k = key.pop(0) - if key == []: - node = Node(k,content) - trie.append(node) - else: - for ch in trie: - if ch.token() == k: - self._add(ch.children(), key, content) - - def add(self,key, content): - self._add(self._root, key, content) - - def _graph(self, trie, G): - for l in trie: - G.add_node(l.token()) - for ch in l.children(): - G.add_edge(l.token(),ch.token()) - self._graph(l.children(), G) - - def graph(self): - G = pgv.AGraph(directed=True) - G.add_node("sitemap") - for ch in self._root: - G.add_edge("sitemap",ch.token()) - self._graph(self._root, G) -# G.layout('dot') -# G.draw('g.png') -# print G.string() - - def _menu(self, trie, lang, page, css): - html = "\n" % css - for l in trie: - sel = '' - p = l.value().page(lang) - if p == page: - sel = ' class="selected"' - if p != None: - html += '%s\n' \ - % (sel,l.value().link(),p.menu()) - else: - html += '%s*\n' \ - % (sel,l.value().link(), l.value().page('en').menu()) - if l.children(): - html += self._menu(l.children(), lang, page, "") - html += "\n" - return html - - def menu(self,lang,page,cssclass): - css = '' - if cssclass: - css = ' class="'+cssclass+'"' - return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2)) diff --git a/treecutter/trie.py b/treecutter/trie.py index 6f03ff7..446c642 100755 --- a/treecutter/trie.py +++ b/treecutter/trie.py @@ -17,233 +17,6 @@ from amara import bindery from amara.xslt import transform from Cheetah.Template import Template -parser = argparse.ArgumentParser(description='Process docbook article tree.') -parser.add_argument('--style', nargs='?', - default=os.path.dirname(os.getcwd())+'/style/default/') -parser.add_argument('--output', nargs='?', - default=os.path.dirname(os.getcwd())+'/htdocs/') -args = parser.parse_args() - -style_xslt = args.style+"docbook.xsl" -outputdir = args.output - -tmptarget = tempfile.mkdtemp()+'/' - -valid_scripts = ['.py','.pl'] -MAXLEVEL = 10000 - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST: - pass - else: raise - -def publish(src,target): - cmd = ["rsync","-a","--delete",src,target] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -def ssh_cmd(target, command): - t = target.split(":") - c = command.split() - cmd = ["ssh",t[0],c[0],c[1],t[1]] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - -PREFIXES={u'db': u'http://docbook.org/ns/docbook', - u'xi': u'http://www.w3.org/2001/XInclude', - u'xl': u'http://www.w3.org/1999/xlink', - u'html' : u'http://www.w3.org/1999/xhtml'} - -class Directory(): - """Class containing the state of the directory with articles""" - def __init__(self): - self._cwd = '.' - self._tree = [] - - def scan(self): - for dirname, dirnames, filenames in os.walk(self._cwd): - for filename in filenames: - if fnmatch.fnmatch(filename, '*.xml'): - file_ = os.path.join(dirname,filename) - doc = bindery.parse(file_, prefixes=PREFIXES) - title = doc.xml_select(u'/db:article/db:info/db:title') - menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev') - if title and menu: - base = file_.split('.')[1] - link = base.replace('index','') - self._tree.append(link) - - def set(self): - return set(self._tree) - -class Page(): - """Class representing a version of a webpage""" - def __init__(self,link,page): - self._link = link - self._file = page[1] - self._lang = page[0] - self._doc = None - self._resources = [] - self._title = None - self._menu = None - self._rendered_article = None - - def language(self): - return self._lang - - def resources(self): - return set(self._resources) - - def menu(self): - return self._menu - - def set_article(self,art): - self._rendered_article = art - - def prepare(self): - self._doc = bindery.parse(self._file, prefixes=PREFIXES) - if self._doc.xml_select(u'/db:article/db:info/db:title'): - self._title = unicode(self._doc.article.info.title) - if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'): - self._menu = unicode(self._doc.article.info.titleabbrev) - - dirname = os.path.dirname(self._file) - code = self._doc.xml_select(u"//xi:include[@parse='text']") - if code: - for c in code: - (p, ext) = os.path.splitext(c.href) - if ext in valid_scripts: - exe = [] - exe.append(os.path.join(os.path.abspath(dirname)+'/'+c.href)) - if c.xml_select(u"//xi:include[@accept-language]"): - alang = c.xml_attributes[None, "accept-language"] - exe.append("lang="+alang) - if c.xml_select(u"//xi:include[@xpointer]"): - exe.append("xptr="+c.xpointer) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE) - xstr = bindery.parse(str(xml.stdout.read())) - idp = c.xml_index_on_parent - for x in xstr.xml_children: - c.xml_parent.xml_insert(idp,x) - c.xml_parent.xml_remove(c) - - for r in self._doc.xml_select(u"//db:link[@xl:href]"): - rf = os.path.join(dirname,r.href) - if os.path.isfile(rf): - self._resources.append(rf) - for i in self._doc.xml_select(u"//db:imagedata[@fileref]"): - im = os.path.join(dirname,i.fileref) - if os.path.isfile(im): - self._resources.append(im) - for i in self._doc.xml_select(u"//html:form[@action]"): - pyscript = re.split('\.py',i.action,1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self): - # amara can not handle the docbook stylesheets - # xmlarticle = transform(doc,style_xslt) - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(self._doc.xml_encode(omit_xml_declaration=True)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = open(outfile,'r') - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) - - def template(self,sitemap): - htmlmenu = sitemap.gen_menu(self._lang,None,"menu") - levelmenu = sitemap.gen_menu(self._lang,self,"tree") - langmenu = sitemap.lang_menu(self._lang,self._link) - template = Template(file=args.style+'index.'+self._lang+'.html.tmpl', - searchList=[{'title':self._title}, - {'menu':htmlmenu}, - {'article':self._rendered_article}, - {'levelmenu':levelmenu}, - {'langmenu':langmenu}]) - outfile = tmptarget+'html'.join(self._file.rsplit('xml',1)) - mkdir_p(os.path.dirname(outfile)) - out = open(outfile, 'w') - out.write(str(template)) - out.close() - - -class Link(): - """Class representing a webpage on the site""" - def __init__(self,link): - self._link = link - # find the representations of the link. - self._pages = [] - path = link - if self._link[-1] == '/': - path = path+'index' - lang = self._scan_languages(path) - for l in lang: - self._pages.append(Page(self,l)) - - def add_page(self,l): - self._pages.append(Page(self,l)) - - def _scan_languages(self,path): - lang = [] - for l in glob.glob('.'+path+'*'): - ls = l.split('.') - if len(ls) > 3 and ls[3] == 'xml': - lang.append((ls[2],l)) - return lang - - def link(self): - return self._link - - def prepare(self): - for page in self._pages: - page.prepare() - - def languages(self): - p = [] - for page in self._pages: - p.append(page.language()) - return p - - def render(self): - for page in self._pages: - page.render() - - def template(self,sitemap): - for page in self._pages: - page.template(sitemap) - - def page(self,lang): - for page in self._pages: - if page.language()==lang: - return page - return None - - def resources(self): - res = set() - for page in self._pages: - res = res.union(page.resources()) - return res - - class Node(): def __init__(self,token,value): self._token = token @@ -326,131 +99,3 @@ class Trie(): if cssclass: css = ' class="'+cssclass+'"' return self._menu(self._root, lang, page, css) - -class Sitemap(): - """Class keeping the internal site structure""" - def __init__(self): - self._file = 'sitemap.txt' - self._tree = Trie() - self._sitelang = set() - self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml') - self._tranlang = {} - - def add_link(self, link): - tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link)) - self._tree.add(tokens,Link(link)) - - def write_map(self): - f = open(self._file,'w') - f.write('\n'.join(link.link() for link in self._tree)) - f.close() - - def read_map(self): - try: - f = open(self._file) - sml = f.read().split() - f.close() - for line in sml: - self.add_link(line) - except IOError, what_error: - print 'INFO: Could not read sitemap.txt - one will be created' - - def set(self): - return set(link.link() for link in self._tree) - - def process(self): - t1 = time.time() - for link in self._tree: - link.prepare() - t2 = time.time() - print "Prepare [%5.2f s]" % (round(t2-t1,2)) - for link in self._tree: - self._sitelang = self._sitelang.union(set(link.languages())) - for tran in self._sitelang: - if tran != 'en': - self._tranlang[tran] = gettext.translation('iso_639_3', - languages=[tran]) - t3 = time.time() - print "Language [%5.2f s]" % (round(t3-t2,2)) - for link in self._tree: - link.render() - t4 = time.time() - print "Render [%5.2f s]" % (round(t4-t3,2)) - for link in self._tree: - link.template(self) - t5 = time.time() - print "Template [%5.2f s]" % (round(t5-t4,2)) - t6 = time.time() - res = set() - cwd = os.getcwd() - for link in self._tree: - res = res.union(link.resources()) - for f in res: - outfile = tmptarget+f - mkdir_p(os.path.dirname(outfile)) - shutil.copyfile(f,outfile) - print "Resources[%5.2f s]" % (round(t6-t5,2)) - sitmaplink = Link('/sitemap') - for l in self._sitelang: - sitmaplink.add_page((l,'/sitemap.'+l+'.xml')) - for l in self._sitelang: - sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap")) - sitmaplink.page(l).template(self) - t7 = time.time() - print "Sitemap [%5.2f s]" % (round(t7-t6,2)) - - def graph(self): - self._tree.graph() - - def gen_menu(self,lang,page,cssclass): - return self._tree.menu(lang,page,cssclass) - - def lang_menu(self,lang,link): - html = "
    " - for l in link.languages(): - isoxml = u"//iso_639_3_entry[@*='"+l+"']" - ln = self._isocode.xml_select(isoxml)[0].name - if lang != 'en': - ln = self._tranlang[lang].gettext(ln) - p = link.link() - if p[-1] == '/': - p = p +'index' - p = p+'.'+l - html += '
  • %s
  • ' % (p, l, ln) - html += "
" - return html - - def publish(self): - ssh_cmd(args.output,"mkdir -p") - publish(tmptarget, args.output) - for res in ["css","images","js","favicon.ico"]: - if (os.path.exists(args.style+res)): - publish(args.style+res, args.output) - ssh_cmd(args.output,"chmod a+rx") - -ts = time.time() -dir_ = Directory() -sitemap = Sitemap() - -dir_.scan() -sitemap.read_map() - -missing = dir_.set() - sitemap.set() -removed = sitemap.set() - dir_.set() -for page in removed: - print page+' pages missing!!' -for page in missing: - print 'adding missing page '+page - sitemap.add_link(page) -if len(missing)+len(removed) != 0: - print 'writing new sitemap - please adjust if needed' - sitemap.write_map() -sitemap.graph() - -sitemap.process() - -t1 = time.time() -sitemap.publish() -t2 = time.time() -print "Publish [%5.2f s]" % (round(t2-t1,2)) -print "Total [%5.2f s]" % (round(t2-ts,2))