xml: change from amara to lxml
[treecutter.git] / treecutter / page.py
index bf14a0043bbd4c5fbafe877a6f10e9b229a36b15..22d380ec033562358c0d01562af3d4544f5b87e7 100644 (file)
@@ -3,8 +3,7 @@ import os
 import subprocess
 import tempfile
 import re
-from amara import bindery
-from amara.xslt import transform
+from lxml import etree
 from Cheetah.Template import Template
 from pkg_resources import resource_filename, resource_listdir
 from time import time
@@ -36,72 +35,82 @@ class Page():
         self._rendered_article = art
 
     def prepare(self):
-        self._doc = bindery.parse(self._file, prefixes=const.PREFIXES)
-        if self._doc.xml_select(u'/db:article/db:info/db:title'):
-            self._title = unicode(self._doc.article.info.title)
-        if self._doc.xml_select(u'/db:article/db:info/db:titleabbrev'):
-            self._menu = unicode(self._doc.article.info.titleabbrev)
+        self._doc = etree.parse(self._file)
+        t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+        if t:
+            self._title = unicode(t[0].text)
+        ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
+        if ta:
+            self._menu = unicode(ta[0].text)
 
         dirname = os.path.dirname(self._file)
         cwd = os.getcwd()
-        code  = self._doc.xml_select(u"//xi:include[@parse='text']")
+        code  = self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH)
         if code:
             for c in code:
-                (p, ext) = os.path.splitext(c.href)
+                href = c.get('href')
+                alang = c.get('accept-language')
+                xpointer = c.get('xpointer')
+                (p, ext) = os.path.splitext(href)
                 if ext in const.valid_scripts:
                     exe = []
-                    script = os.path.join(os.path.abspath(dirname)+'/'+c.href)
+                    script = os.path.join(os.path.abspath(dirname)+'/'+href)
                     if os.path.isfile(script):
                         exe.append(script)
                     else:
-                        if c.href in resource_listdir('xinclude', ''):
-                            script = resource_filename('xinclude', c.href)
+                        if href in resource_listdir('xinclude', ''):
+                            script = resource_filename('xinclude', href)
                             exe.append(script)
                         else:
-                            print "Script "+c.href+" in "+self._file+" missing"
-                    if c.xml_select(u"//xi:include[@accept-language]"):
-                        alang = c.xml_attributes[None, "accept-language"]
+                            print "Script "+href+" in "+self._file+" missing"
+                    if alang:
                         exe.append("lang="+alang)
-                    if c.xml_select(u"//xi:include[@xpointer]"):
-                        exe.append("xptr="+c.xpointer)
-                    print "  executing %15s" % (c.href),
+                    if xpointer:
+                        exe.append("xptr="+xpointer)
+                    print "  executing %15s" % (href),
                     ts = time()
                     os.chdir(dirname)
-                    xml = subprocess.Popen(exe,stdout=subprocess.PIPE)
+                    xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
+                                           stderr=subprocess.PIPE)
+                    (stdout, stderr) = xml.communicate()
+                    if stderr:
+                        print " ".join(exe)+" ERROR : [ "+stderr+" ]"
                     os.chdir(cwd)
-                    xmlblock = str(xml.stdout.read())
                     te = time()
-                    print " [%5.2f s]  (%s)" % (round(te-ts,2),c.xpointer)
-                    xstr = bindery.parse(xmlblock)
-                    idp = c.xml_index_on_parent
-                    for x in xstr.xml_children:
-                        c.xml_parent.xml_insert(idp,x)
-                        c.xml_parent.xml_remove(c)
+                    print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
+                    xstr = etree.fromstring(stdout)
+# inserting the generated code and remove the xinclude reference
+                    idp = c.getparent()
+                    idp.insert(idp.index(c)+1,xstr)
+                    idp.remove(c)
 
-        for r in self._doc.xml_select(u"//db:link[@xl:href]"):
-            rf = os.path.join(dirname,r.href)
+        for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
+            rf = os.path.join(dirname,r.get(const.XLINK+'href'))
             if os.path.isfile(rf):
                 self._resources.append(rf)
-        for i in self._doc.xml_select(u"//db:imagedata[@fileref]"):
-            im = os.path.join(dirname,i.fileref)
+        for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
+            im = os.path.join(dirname,i.get('fileref'))
             if os.path.isfile(im):
                 self._resources.append(im)
-        for i in self._doc.xml_select(u"//html:form[@action]"):
-            pyscript = re.split('\.py',i.action,1)[0]+'.py'
+        for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
+            pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
             im = os.path.join(dirname,pyscript)
             if os.path.isfile(im):
                 self._resources.append(im)
 
     def render(self, style):
-        #  amara can not handle the docbook stylesheets
-        #  xmlarticle = transform(doc,style_xslt)
+
+#        xslt_root = etree.XML(open(style+"docbook.xsl", 'r').read())
+#        transform = etree.XSLT(xslt_root)
+#        result = etree.tostring(transform(xml_root))
+
         cwd = os.getcwd()
         dirname = os.path.dirname(self._file)
         os.chdir(dirname)
         infile  = os.path.basename(tempfile.mktemp())
         outfile = tempfile.mktemp()
         tfi = open(infile,'w')
-        tfi.write(self._doc.xml_encode(omit_xml_declaration=True))
+        tfi.write(etree.tostring(self._doc,encoding='UTF-8',pretty_print=False))
         tfi.close()
 #  cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
         cmd = ["xsltproc","--xinclude","--output",outfile,style+"docbook.xsl",infile]