12 import pygraphviz as pgv
14 from amara import bindery
15 from amara.xslt import transform
16 from Cheetah.Template import Template
18 parser = argparse.ArgumentParser(description='Process docbook article tree.')
19 parser.add_argument('--style', nargs='?',
20 default=os.path.dirname(os.getcwd())+'/style/default/')
21 parser.add_argument('--output', nargs='?',
22 default=os.path.dirname(os.getcwd())+'/htdocs/')
23 args = parser.parse_args()
25 style_xslt = args.style+"docbook.xsl"
26 style_tmpl = args.style+"index.en.html.tmpl"
27 outputdir = args.output
29 valid_scripts = ['.py','.pl']
35 except OSError as exc: # Python >2.5
36 if exc.errno == errno.EEXIST:
40 def publish(src,target):
41 cmd = ["rsync","-a","--delete",src,target]
42 retcode = subprocess.call(cmd)
44 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
47 PREFIXES={u'db': u'http://docbook.org/ns/docbook',
48 u'xi': u'http://www.w3.org/2001/XInclude',
49 u'xl': u'http://www.w3.org/1999/xlink'}
52 """Class containing the state of the directory with articles"""
58 for dirname, dirnames, filenames in os.walk(self._cwd):
59 for filename in filenames:
60 if fnmatch.fnmatch(filename, '*.xml'):
61 file_ = os.path.join(dirname,filename)
62 doc = bindery.parse(file_, prefixes=PREFIXES)
63 title = doc.xml_select(u'/db:article/db:info/db:title')
64 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
66 base = file_.split('.')[1]
67 link = base.replace('index','')
68 self._tree.append(link)
71 return set(self._tree)
74 """Class representing a version of a webpage"""
75 def __init__(self,page):
82 self._rendered_article = None
85 self._doc = bindery.parse(self._file, prefixes=PREFIXES)
86 if self._doc.xml_select(u'/db:article/db:info/db:title'):
87 self._title = unicode(doc.article.info.title)
88 if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
89 self._menu = unicode(doc.article.info.titleabbrev)
91 dirname = os.path.dirname(self._file)
92 code = self._doc.xml_select(u"//xi:include[@parse='text']")
95 (p, ext) = os.path.splitext(c.href)
96 if ext in valid_scripts:
97 exe = os.path.join(os.path.abspath(dirname+c.href))
98 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
99 xstr = bindery.parse(str(xml.stdout.read()))
100 idp = c.xml_index_on_parent
101 for x in xstr.xml_children:
102 c.xml_parent.xml_insert(idp,x)
103 c.xml_parent.xml_remove(c)
105 for r in self._doc.xml_select(u"//db:link[@xl:href]"):
106 rf = os.path.join(dirname,r.href)
107 if os.path.isfile(rf):
108 self._resources.append(rf)
109 for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
110 im = os.path.join(dirname,i.fileref)
111 if os.path.isfile(im):
112 self._resources.append(im)
115 # amara can not handle the docbook stylesheets
116 # xmlarticle = transform(doc,style_xslt)
118 dirname = os.path.dirname(self._file)
120 infile = os.path.basename(tempfile.mktemp())
121 outfile = tempfile.mktemp()
122 tfi = open(infile,'w')
123 tfi.write(doc.xml_encode())
125 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
126 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
127 retcode = subprocess.call(cmd)
129 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
130 tfo = open(outfile,'r')
131 self._rendered_article = tfo.read()
137 def template(self,sitemap):
138 htmlmenu = sitemap.gen_menu(self._lang,None)
139 levelmenu = sitemap.gen_menu(self._lang,self)
140 template = Template(file=style_tmpl,
141 searchList=[{'title':self._title},
143 {'article':self._rendered_article},
144 {'levelmenu':levelmenu},
145 {'levelname':levelname}])
146 outfile = tmptarget+self._file+'.'+self._lang+'.html'
147 mkdir_p(os.path.dirname(outfile))
148 out = open(outfile, 'w')
149 out.write(str(template))
154 """Class representing a webpage on the site"""
155 def __init__(self,link):
157 # find the representations of the link.
160 if self._link[-1] == '/':
162 lang = self._scan_languages(path)
164 self._pages.append(Page(l))
166 def _scan_languages(self,path):
168 for l in glob.glob('.'+path+'*'):
170 if len(ls) > 3 and ls[3] == 'xml':
171 lang.append((ls[2],l))
178 def __init__(self,token,value):
190 return self._children
196 def _add(self,trie, key, content):
200 node = Node(k,content)
205 self._add(ch.children(), key, content)
207 def add(self,key, content):
208 self._add(self._root, key, content)
210 def _graph(self, trie, G):
212 G.add_node(l.token())
213 for ch in l.children():
214 G.add_edge(l.token(),ch.token())
215 self._graph(l.children(), G)
218 G = pgv.AGraph(directed=True)
219 G.add_node("sitemap")
220 for ch in self._root:
221 G.add_edge("sitemap",ch.token())
222 self._graph(self._root, G)
228 """Class keeping the internal site structure"""
230 self._file = 'sitemap.txt'
233 def add_link(self, link):
234 tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
235 self._tree.add(tokens,Link(link))
240 sml = f.read().split()
244 except IOError, what_error:
245 print 'INFO: Could not read sitemap.txt - one will be created'
248 return set(link.link() for link in self._tree)
253 def gen_menu(self,lang,page):
254 return 'Generate menu from sitemap - To be implemented'
256 def generateSitemap():
259 sfile = open('sitemap.txt')
260 flist = sfile.read().split()
263 sitemap.append(dict(link=f))
264 except IOError, what_error:
265 print 'Sitemap missing - generating one.'
267 for dirname, dirnames, filenames in os.walk('.'):
268 for filename in filenames:
269 if fnmatch.fnmatch(filename, '*.xml'):
270 xfile = os.path.join(dirname,filename)
271 doc = bindery.parse(xfile,
272 prefixes={u'db': u'http://docbook.org/ns/docbook',
273 u'xi': u'http://www.w3.org/2001/XInclude',
274 u'xl': u'http://www.w3.org/1999/xlink'})
275 title = doc.xml_select(u'/db:article/db:info/db:title')
276 menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
277 code = doc.xml_select(u"//xi:include[@parse='text']")
278 resource = doc.xml_select(u"//db:link[@xl:href]")
279 image = doc.xml_select(u"//db:imagedata[@fileref]")
282 (p, ext) = os.path.splitext(c.href)
283 if ext in valid_scripts:
288 base = xfile.split('.')[1]
289 link = base.replace('index','')
290 level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
293 rf = os.path.join(dirname,r.href)
294 if os.path.isfile(rf):
297 im = os.path.join(dirname,i.fileref)
298 if os.path.isfile(im):
300 page = dict(title=unicode(doc.article.info.title),
301 menu=unicode(doc.article.info.titleabbrev),
302 output=os.path.join(dirname,
303 filename.replace('xml','html')),
309 if l['link'] == link:
313 print "adding "+link+" to sitemap"
317 sfile = open('sitemap.txt','w')
319 sfile.write(l['link']+'\n')
323 def expandXincludeTxt(page):
324 doc = bindery.parse(page['file'],
325 prefixes={u'db': u'http://docbook.org/ns/docbook',
326 u'xi': u'http://www.w3.org/2001/XInclude'})
328 code = doc.xml_select(u"//xi:include[@parse='text']")
330 (p, ext) = os.path.splitext(c.href)
331 if ext in valid_scripts:
332 exe = os.path.join(os.path.abspath(c.href))
333 xml = subprocess.Popen([exe],stdout=subprocess.PIPE)
334 xstr = bindery.parse(str(xml.stdout.read()))
335 id = c.xml_index_on_parent
336 for x in xstr.xml_children:
337 c.xml_parent.xml_insert(id,x)
338 c.xml_parent.xml_remove(c)
341 def xsltConvert(doc):
342 # amara can not handle the docbook stylesheets
343 # xmlarticle = transform(doc,style_xslt)
345 rundir = os.path.dirname(page['file'])
347 infile = os.path.basename(tempfile.mktemp())
348 outfile = tempfile.mktemp()
349 tfi = open(infile,'w')
350 tfi.write(doc.xml_encode())
352 # cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
353 cmd = ["xsltproc","--xinclude","--output",outfile,style_xslt,infile]
354 retcode = subprocess.call(cmd)
356 print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
357 tfo = open(outfile,'r')
365 def genMenu(page,sitemap,slevel,elevel):
368 if elevel == MAXLEVEL or elevel == 1 or page == None:
372 html = '<ul class="tree">\n'
373 idx = sitemap.index(page)
374 while (sitemap[idx]['level'] == page['level']):
376 title = sitemap[idx]['menu']
378 while (idx < len(sitemap) and sitemap[idx]['level'] == page['level']):
379 sm.append(sitemap[idx])
384 if slevel > p['level'] or elevel < p['level']:
386 if not title and p['link'] == '/':
389 if oldlevel < p['level']:
391 elif oldlevel > p['level']:
392 if p['link'][-1] == '/':
394 html+='</ul>\n</li>\n'
395 if page != None and page == p:
396 html+='<li class="selected"><a href="%s">%s</a>' % (p['link'],p['menu'])
398 html+='<li><a href="%s">%s</a>' % (p['link'],p['menu'])
399 if p['link'][-1] != '/' or p['link'] == '/':
401 oldlevel = p['level']
405 def writeToTemplate(page,doc,sitemap):
406 (menu,menuname) = genMenu(page,sitemap,1,MAXLEVEL)
407 (levelmenu,levelname) = genMenu(page,sitemap,page['level'],page['level'])
408 template = Template(file=style_tmpl,
409 searchList=[{'title':page['title']},
412 {'levelmenu':levelmenu},
413 {'levelname':levelname}])
414 outfile = tmptarget+page['output']
415 mkdir_p(os.path.dirname(outfile))
416 out = open(outfile, 'w')
417 out.write(str(template))
419 for r in page['res']:
420 mkdir_p(os.path.dirname(tmptarget+r))
421 shutil.copyfile(r, tmptarget+r)
423 def createSitemap(sitemap):
424 (menu,menuname) = genMenu(None,sitemap,1,MAXLEVEL)
425 template = Template(file=style_tmpl,
432 outfile = tmptarget+'sitemap.en.html'
433 mkdir_p(os.path.dirname(outfile))
434 out = open(outfile, 'w')
435 out.write(str(template))
444 missing = dir_.set() - sitemap.set()
445 removed = sitemap.set() - dir_.set()
447 print removed+' pages missing!!'
450 print 'adding missing page '+page
451 sitemap.add_page(page)
456 sitemap = generateSitemap()
457 tmptarget = tempfile.mkdtemp()+'/'
460 print "Page : %-30s %30s" % (page['link'],
461 time.ctime(os.stat(page['file']).st_mtime)),
462 doc = expandXincludeTxt(page)
463 pubdoc = xsltConvert(doc)
464 writeToTemplate(page,pubdoc,sitemap)
466 print "[%5.2f s]" % (round(t2-t1,2))
468 createSitemap(sitemap)
469 publish(tmptarget, args.output)
470 publish(args.style+"css", args.output)
471 publish(args.style+"images",args.output)