import pygraphviz as pgv
import glob
import gettext
+import shutil
from amara import bindery
from amara.xslt import transform
from Cheetah.Template import Template
args = parser.parse_args()
style_xslt = args.style+"docbook.xsl"
-style_tmpl = args.style+"index.en.html.tmpl"
outputdir = args.output
tmptarget = tempfile.mkdtemp()+'/'
class Page():
"""Class representing a version of a webpage"""
- def __init__(self,page):
+ def __init__(self,link,page):
+ self._link = link
self._file = page[1]
self._lang = page[0]
self._doc = None
def language(self):
return self._lang
+ def resources(self):
+ return set(self._resources)
+
def menu(self):
return self._menu
def template(self,sitemap):
htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
levelmenu = sitemap.gen_menu(self._lang,self,"tree")
- langmenu = sitemap.lang_menu(self._lang)
- template = Template(file=style_tmpl,
+ langmenu = sitemap.lang_menu(self._lang,self._link)
+ template = Template(file=args.style+'index.'+self._lang+'.html.tmpl',
searchList=[{'title':self._title},
{'menu':htmlmenu},
{'article':self._rendered_article},
path = path+'index'
lang = self._scan_languages(path)
for l in lang:
- self._pages.append(Page(l))
+ self._pages.append(Page(self,l))
+
+ def add_page(self,l):
+ self._pages.append(Page(self,l))
def _scan_languages(self,path):
lang = []
for page in self._pages:
if page.language()==lang:
return page
+ return None
+
+ def resources(self):
+ res = set()
+ for page in self._pages:
+ res = res.union(page.resources())
+ return res
+
class Node():
def __init__(self,token,value):
html = "<ul%s>\n" % css
for l in trie:
sel = ''
- if l.value().page(lang) == page:
+ p = l.value().page(lang)
+ if p == page:
sel = ' class="selected"'
- html += '<li%s><a href="%s">%s</a>\n' \
- % (sel,l.value().link(),l.value().page(lang).menu())
- html += self._menu(l.children(), lang, page, "")
+ if p != None:
+ html += '<li%s><a href="%s">%s</a>\n' \
+ % (sel,l.value().link(),p.menu())
+ else:
+ html += '<li%s><a href="%s.en" hreflang="en">%s</a>*\n' \
+ % (sel,l.value().link(), l.value().page('en').menu())
+ if l.children():
+ html += self._menu(l.children(), lang, page, "")
html += "</ul>\n"
return html
self._tranlang = {}
def add_link(self, link):
- tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
+ tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
self._tree.add(tokens,Link(link))
def write_map(self):
self._sitelang = self._sitelang.union(set(link.languages()))
for tran in self._sitelang:
if tran != 'en':
- self._tranlang[tran] = gettext.translation('iso_639_3', languages=[tran])
+ self._tranlang[tran] = gettext.translation('iso_639_3',
+ languages=[tran])
t3 = time.time()
print "Language [%5.2f s]" % (round(t3-t2,2))
for link in self._tree:
link.template(self)
t5 = time.time()
print "Template [%5.2f s]" % (round(t5-t4,2))
- sm = {}
- for l in self._sitelang:
- sm[l] = Page((l,'/sitemap'))
- sm[l].set_article(self.gen_menu(l,None,"tree sitemap"))
- sm[l].template(self)
t6 = time.time()
- print "Sitemap [%5.2f s]" % (round(t6-t5,2))
+ res = set()
+ cwd = os.getcwd()
+ for link in self._tree:
+ res = res.union(link.resources())
+ for f in res:
+ outfile = tmptarget+f
+ mkdir_p(os.path.dirname(outfile))
+ shutil.copyfile(f,outfile)
+ print "Resources[%5.2f s]" % (round(t6-t5,2))
+ sitmaplink = Link('/sitemap')
+ for l in self._sitelang:
+ sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
+ for l in self._sitelang:
+ sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
+ sitmaplink.page(l).template(self)
+ t7 = time.time()
+ print "Sitemap [%5.2f s]" % (round(t7-t6,2))
def graph(self):
self._tree.graph()
def gen_menu(self,lang,page,cssclass):
return self._tree.menu(lang,page,cssclass)
- def lang_menu(self,lang):
+ def lang_menu(self,lang,link):
html = "<ul>"
- for l in self._sitelang:
+ for l in link.languages():
isoxml = u"//iso_639_3_entry[@*='"+l+"']"
ln = self._isocode.xml_select(isoxml)[0].name
if lang != 'en':
ln = self._tranlang[lang].gettext(ln)
- html += '<li><a href="%s">%s</a></li>' % ('link'+'.'+l, ln)
+ p = link.link()
+ if p[-1] == '/':
+ p = p +'index'
+ p = p+'.'+l
+ html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
html += "</ul>"
return html
publish(args.style+"css", args.output)
publish(args.style+"images",args.output)
-def generateSitemap():
- sitemap = []
- try:
- sfile = open('sitemap.txt')
- flist = sfile.read().split()
- sfile.close()
- for f in flist:
- sitemap.append(dict(link=f))
- except IOError, what_error:
- print 'Sitemap missing - generating one.'
-
- for dirname, dirnames, filenames in os.walk('.'):
- for filename in filenames:
- if fnmatch.fnmatch(filename, '*.xml'):
- xfile = os.path.join(dirname,filename)
- doc = bindery.parse(xfile,
- prefixes={u'db': u'http://docbook.org/ns/docbook',
- u'xi': u'http://www.w3.org/2001/XInclude',
- u'xl': u'http://www.w3.org/1999/xlink'})
- title = doc.xml_select(u'/db:article/db:info/db:title')
- menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
- code = doc.xml_select(u"//xi:include[@parse='text']")
- resource = doc.xml_select(u"//db:link[@xl:href]")
- image = doc.xml_select(u"//db:imagedata[@fileref]")
- exe = 0
- for c in code:
- (p, ext) = os.path.splitext(c.href)
- if ext in valid_scripts:
- exe = 1
-
- if title and menu:
- found = 0
- base = xfile.split('.')[1]
- link = base.replace('index','')
- level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
- res = []
- for r in resource:
- rf = os.path.join(dirname,r.href)
- if os.path.isfile(rf):
- res.append(rf)
- for i in image:
- im = os.path.join(dirname,i.fileref)
- if os.path.isfile(im):
- res.append(im)
- page = dict(title=unicode(doc.article.info.title),
- menu=unicode(doc.article.info.titleabbrev),
- output=os.path.join(dirname,
- filename.replace('xml','html')),
- exe=exe,
- file=xfile,
- res=res,
- level=level)
- for l in sitemap:
- if l['link'] == link:
- found = 1
- l.update(page)
- if not found:
- print "adding "+link+" to sitemap"
- dd = dict(link=link)
- dd.update(page)
- sitemap.append(dd)
- sfile = open('sitemap.txt','w')
- for l in sitemap:
- sfile.write(l['link']+'\n')
- sfile.close()
- return sitemap
-
-def expandXincludeTxt(page):
- doc = bindery.parse(page['file'],
- prefixes={u'db': u'http://docbook.org/ns/docbook',
- u'xi': u'http://www.w3.org/2001/XInclude'})
- if page['exe']:
- code = doc.xml_select(u"//xi:include[@parse='text']")
- for c in code:
- (p, ext) = os.path.splitext(c.href)
- if ext in valid_scripts:
- exe = os.path.join(os.path.abspath(c.href))
- xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
- xstr = bindery.parse(str(xml.stdout.read()))
- id = c.xml_index_on_parent
- for x in xstr.xml_children:
- c.xml_parent.xml_insert(id,x)
- c.xml_parent.xml_remove(c)
- return doc
-
-def xsltConvert(doc):
-# amara can not handle the docbook stylesheets
-# xmlarticle = transform(doc,style_xslt)
- cwd = os.getcwd()
- rundir = os.path.dirname(page['file'])
- os.chdir(rundir)
- infile = os.path.basename(tempfile.mktemp())
- outfile = tempfile.mktemp()
- tfi = open(infile,'w')
- tfi.write(doc.xml_encode())
- tfi.close()
-# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
- cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
- tfo = open(outfile,'r')
- result = tfo.read()
- tfo.close()
- os.remove(infile)
- os.remove(outfile)
- os.chdir(cwd)
- return result
-
-def genMenu(page,sitemap,slevel,elevel):
- title = None
- sm = []
- if elevel == MAXLEVEL or elevel == 1 or page == None:
- html = '<ul>\n'
- sm = sitemap
- else:
- html = '<ul class="tree">\n'
- idx = sitemap.index(page)
- while (sitemap[idx]['level'] == page['level']):
- idx = idx-1
- title = sitemap[idx]['menu']
- idx = idx+1
- while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
- sm.append(sitemap[idx])
- idx = idx+1
- oldlevel = slevel
-
- for p in sm:
- if slevel > p['level'] or elevel < p['level']:
- continue
- if not title and p['link'] == '/':
- title = p['menu']
-
- if oldlevel < p['level']:
- html+='<ul>\n'
- elif oldlevel > p['level']:
- if p['link'][-1] == '/':
- html+='</li>\n'
- html+='</ul>\n</li>\n'
- if page != None and page == p:
- html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
- else:
- html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
- if p['link'][-1] != '/' or p['link'] == '/':
- html+='</li>\n'
- oldlevel = p['level']
- html+='</ul>\n'
- return (html,title)
-
-def writeToTemplate(page,doc,sitemap):
- (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
- (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
- template = Template(file=style_tmpl,
- searchList=[{'title':page['title']},
- {'menu':menu},
- {'article':doc},
- {'levelmenu':levelmenu},
- {'levelname':levelname}])
- outfile = tmptarget+page['output']
- mkdir_p(os.path.dirname(outfile))
- out = open(outfile, 'w')
- out.write(str(template))
- out.close()
- for r in page['res']:
- mkdir_p(os.path.dirname(tmptarget+r))
- shutil.copyfile(r, tmptarget+r)
-
-def createSitemap(sitemap):
- (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
- template = Template(file=style_tmpl,
- searchList=[
- {'title':'Sitemap'},
- {'menu':menu},
- {'article':menu},
- {'levelmenu':''},
- {'levelname':''}])
- outfile = tmptarget+'sitemap.en.html'
- mkdir_p(os.path.dirname(outfile))
- out = open(outfile, 'w')
- out.write(str(template))
- out.close()
-
+ts = time.time()
dir_ = Directory()
sitemap = Sitemap()
for page in missing:
print 'adding missing page '+page
sitemap.add_link(page)
-if len(missing & removed) != 0:
+if len(missing)+len(removed) != 0:
print 'writing new sitemap - please adjust if needed'
sitemap.write_map()
sitemap.graph()
sitemap.publish()
t2 = time.time()
print "Publish [%5.2f s]" % (round(t2-t1,2))
-
-sitemap = generateSitemap()
-tmptarget = tempfile.mkdtemp()+'/'
-tot = 0
-for page in sitemap:
- t1 = time.time()
- print "Page : %-30s %30s" % (page['link'],
- time.ctime(os.stat(page['file']).st_mtime)),
- doc = expandXincludeTxt(page)
- pubdoc = xsltConvert(doc)
- writeToTemplate(page,pubdoc,sitemap)
- t2 = time.time()
- print "[%5.2f s]" % (round(t2-t1,2))
- tot = tot + (t2-t1)
-
-print "Total time\t\t\t\t\t\t\t [%5.2f s]" % (round(tot,2))
-createSitemap(sitemap)
-publish(tmptarget, args.output)
-publish(args.style+"css", args.output)
-publish(args.style+"images",args.output)
+print "Total [%5.2f s]" % (round(t2-ts,2))