changed xml processing from amara to lxml, mainly due to that lxml is
more actively maintained and availible in distributions.
Some html generation was also changed to lxml, there the namespace could
cause some problems (language menu).
HTML_NS="http://www.w3.org/1999/xhtml"
HTML = "{%s}" % HTML_NS
NSMAP = {None : DB_NS,
HTML_NS="http://www.w3.org/1999/xhtml"
HTML = "{%s}" % HTML_NS
NSMAP = {None : DB_NS,
+ 'xi' : XI_NS,
+ 'xlink' : XLINK_NS,
+ 'html' : HTML_NS}
+XPATH = {'db' : DB_NS,
+ 'xi' : XI_NS,
+ 'xlink' : XLINK_NS,
+ 'html' : HTML_NS}
#!/usr/bin/python
import os
import fnmatch
#!/usr/bin/python
import os
import fnmatch
-from amara import bindery
import treecutter.constants as const
class Directory():
import treecutter.constants as const
class Directory():
for filename in filenames:
if fnmatch.fnmatch(filename, '*.xml'):
file_ = os.path.join(dirname,filename)
for filename in filenames:
if fnmatch.fnmatch(filename, '*.xml'):
file_ = os.path.join(dirname,filename)
- doc = bindery.parse(file_, prefixes=const.PREFIXES)
- title = doc.xml_select(u'/db:article/db:info/db:title')
- menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ doc = etree.parse(file_)
+ title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+ menu = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
if title and menu:
base = file_.split('.')[1]
link = base.replace('index','')
if title and menu:
base = file_.split('.')[1]
link = base.replace('index','')
import subprocess
import tempfile
import re
import subprocess
import tempfile
import re
-from amara import bindery
-from amara.xslt import transform
from Cheetah.Template import Template
from pkg_resources import resource_filename, resource_listdir
from time import time
from Cheetah.Template import Template
from pkg_resources import resource_filename, resource_listdir
from time import time
self._rendered_article = art
def prepare(self):
self._rendered_article = art
def prepare(self):
- self._doc = bindery.parse(self._file, prefixes=const.PREFIXES)
- if self._doc.xml_select(u'/db:article/db:info/db:title'):
- self._title = unicode(self._doc.article.info.title)
- if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
- self._menu = unicode(self._doc.article.info.titleabbrev)
+ self._doc = etree.parse(self._file)
+ t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+ if t:
+ self._title = unicode(t[0].text)
+ ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
+ if ta:
+ self._menu = unicode(ta[0].text)
dirname = os.path.dirname(self._file)
cwd = os.getcwd()
dirname = os.path.dirname(self._file)
cwd = os.getcwd()
- code = self._doc.xml_select(u"//xi:include[@parse='text']")
+ code = self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH)
- (p, ext) = os.path.splitext(c.href)
+ href = c.get('href')
+ alang = c.get('accept-language')
+ xpointer = c.get('xpointer')
+ (p, ext) = os.path.splitext(href)
if ext in const.valid_scripts:
exe = []
if ext in const.valid_scripts:
exe = []
- script = os.path.join(os.path.abspath(dirname)+'/'+c.href)
+ script = os.path.join(os.path.abspath(dirname)+'/'+href)
if os.path.isfile(script):
exe.append(script)
else:
if os.path.isfile(script):
exe.append(script)
else:
- if c.href in resource_listdir('xinclude', ''):
- script = resource_filename('xinclude', c.href)
+ if href in resource_listdir('xinclude', ''):
+ script = resource_filename('xinclude', href)
- print "Script "+c.href+" in "+self._file+" missing"
- if c.xml_select(u"//xi:include[@accept-language]"):
- alang = c.xml_attributes[None, "accept-language"]
+ print "Script "+href+" in "+self._file+" missing"
+ if alang:
exe.append("lang="+alang)
exe.append("lang="+alang)
- if c.xml_select(u"//xi:include[@xpointer]"):
- exe.append("xptr="+c.xpointer)
- print " executing %15s" % (c.href),
+ if xpointer:
+ exe.append("xptr="+xpointer)
+ print " executing %15s" % (href),
ts = time()
os.chdir(dirname)
ts = time()
os.chdir(dirname)
- xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (stdout, stderr) = xml.communicate()
+ if stderr:
+ print " ".join(exe)+" ERROR : [ "+stderr+" ]"
- xmlblock = str(xml.stdout.read())
- print " [%5.2f s] (%s)" % (round(te-ts,2),c.xpointer)
- xstr = bindery.parse(xmlblock)
- idp = c.xml_index_on_parent
- for x in xstr.xml_children:
- c.xml_parent.xml_insert(idp,x)
- c.xml_parent.xml_remove(c)
+ print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer)
+ xstr = etree.fromstring(stdout)
+# inserting the generated code and remove the xinclude reference
+ idp = c.getparent()
+ idp.insert(idp.index(c)+1,xstr)
+ idp.remove(c)
- for r in self._doc.xml_select(u"//db:link[@xl:href]"):
- rf = os.path.join(dirname,r.href)
+ for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
+ rf = os.path.join(dirname,r.get(const.XLINK+'href'))
if os.path.isfile(rf):
self._resources.append(rf)
if os.path.isfile(rf):
self._resources.append(rf)
- for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
- im = os.path.join(dirname,i.fileref)
+ for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
+ im = os.path.join(dirname,i.get('fileref'))
if os.path.isfile(im):
self._resources.append(im)
if os.path.isfile(im):
self._resources.append(im)
- for i in self._doc.xml_select(u"//html:form[@action]"):
- pyscript = re.split('\.py',i.action,1)[0]+'.py'
+ for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
+ pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
im = os.path.join(dirname,pyscript)
if os.path.isfile(im):
self._resources.append(im)
def render(self, style):
im = os.path.join(dirname,pyscript)
if os.path.isfile(im):
self._resources.append(im)
def render(self, style):
- # amara can not handle the docbook stylesheets
- # xmlarticle = transform(doc,style_xslt)
+
+# xslt_root = etree.XML(open(style+"docbook.xsl", 'r').read())
+# transform = etree.XSLT(xslt_root)
+# result = etree.tostring(transform(xml_root))
+
cwd = os.getcwd()
dirname = os.path.dirname(self._file)
os.chdir(dirname)
infile = os.path.basename(tempfile.mktemp())
outfile = tempfile.mktemp()
tfi = open(infile,'w')
cwd = os.getcwd()
dirname = os.path.dirname(self._file)
os.chdir(dirname)
infile = os.path.basename(tempfile.mktemp())
outfile = tempfile.mktemp()
tfi = open(infile,'w')
- tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+ tfi.write(etree.tostring(self._doc,encoding='UTF-8',pretty_print=False))
tfi.close()
# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
cmd = ["xsltproc","--xinclude","--output",outfile,style+"docbook.xsl",infile]
tfi.close()
# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
cmd = ["xsltproc","--xinclude","--output",outfile,style+"docbook.xsl",infile]
import shutil
import gettext
import tempfile
import shutil
import gettext
import tempfile
-from amara import bindery
+from lxml import etree
+from lxml.builder import ElementMaker
+from treecutter import constants as const
from treecutter.trie import Trie
from treecutter.link import Link
from treecutter.tools import ssh_cmd, publish, mkdir_p
from treecutter.trie import Trie
from treecutter.link import Link
from treecutter.tools import ssh_cmd, publish, mkdir_p
self._file = 'sitemap.txt'
self._tree = Trie()
self._sitelang = set()
self._file = 'sitemap.txt'
self._tree = Trie()
self._sitelang = set()
- self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+ self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
self._tranlang = {}
self._tmptarget = tempfile.mkdtemp()+'/'
self._tranlang = {}
self._tmptarget = tempfile.mkdtemp()+'/'
return self._tree.menu(lang,page,cssclass)
def lang_menu(self,lang,link):
return self._tree.menu(lang,page,cssclass)
def lang_menu(self,lang,link):
+ html = ElementMaker(namespace=const.HTML_NS)
+ menu = html.ul()
for l in link.languages():
isoxml = u"//iso_639_3_entry[@*='"+l+"']"
for l in link.languages():
isoxml = u"//iso_639_3_entry[@*='"+l+"']"
- ln = self._isocode.xml_select(isoxml)[0].name
+ ln = self._isocode.xpath(isoxml)[0].get('name')
if lang != 'en':
ln = self._tranlang[lang].gettext(ln)
p = link.link()
if p[-1] == '/':
p = p +'index'
p = p+'.'+l
if lang != 'en':
ln = self._tranlang[lang].gettext(ln)
p = link.link()
if p[-1] == '/':
p = p +'index'
p = p+'.'+l
- html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
- html += "</ul>"
- return html
+ li = html.li(html.a(ln,href=p,hreflang=l))
+ menu.append(li)
+ return etree.tostring(menu,encoding='UTF-8',pretty_print=False)
def publish(self,output,style):
ssh_cmd(output,"mkdir -p")
def publish(self,output,style):
ssh_cmd(output,"mkdir -p")