Switching from external call to use the internal lxml xslt transform.
Creating a class for docbook and image to separate out
docbook specific code and image specific code.
--- /dev/null
+#!/usr/bin/python
+
+import os
+import subprocess
+
+from lxml import etree
+from lxml.builder import ElementMaker
+
+from pkg_resources import resource_filename, resource_listdir
+from time import time
+
+import treecutter.constants as const
+from treecutter.image import Image
+
+class Docbook():
+ """Class representing a docbook document"""
+ def __init__(self,filename):
+ self._filename = filename
+ self._doc = etree.parse(self._filename)
+ self._dirname = os.path.dirname(self._filename)
+
+ def title(self):
+ t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+ if t:
+ t = unicode(t[0].text)
+ ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
+ if ta:
+ ta = unicode(ta[0].text)
+ return (t, ta)
+
+ def expand_imageobjects(self):
+ db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
+ images = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
+ for io in images:
+ image = Image(io)
+ link = db.link(image.infostr(),**{const.XLINK+"href": f})
+ io = db.imageobject(
+ db.imagedata(fileref=image.format(800,600), width=str(800), depth=str(600)),
+ db.caption(db.para(image.caption())))
+
+ def parse_xincludes(self):
+ cwd = os.getcwd()
+ for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
+ href = c.get('href')
+ alang = c.get('accept-language')
+ xpointer = c.get('xpointer')
+ (p, ext) = os.path.splitext(href)
+ if ext in const.valid_scripts:
+ exe = []
+ script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
+ if os.path.isfile(script):
+ exe.append(script)
+ else:
+ if href in resource_listdir('xinclude', ''):
+ script = resource_filename('xinclude', href)
+ exe.append(script)
+ else:
+ print "Script "+href+" in "+self._filename+" missing"
+ if alang:
+ exe.append("lang="+alang)
+ if xpointer:
+ exe.append("xptr="+xpointer)
+ print " executing %15s" % (href),
+ ts = time()
+ os.chdir(self._dirname)
+ xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (stdout, stderr) = xml.communicate()
+ #print xml.returnvalue
+ if stderr:
+ print " ".join(exe)+" ERROR : [ "+stderr+" ]"
+ exit
+ os.chdir(cwd)
+ te = time()
+ print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer)
+ xstr = etree.fromstring(stdout)
+# inserting the generated code and remove the xinclude reference
+ idp = c.getparent()
+ idp.insert(idp.index(c)+1,xstr)
+ idp.remove(c)
+
+ def collect_links(self):
+ res = []
+ for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
+ rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
+ if os.path.isfile(rf):
+ if r.get('security')=='encrypt':
+ with open(rf, 'rb') as f:
+ gpg = gnupg.GPG()
+ status = gpg.encrypt_file(
+ f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
+ output=rf+'.gpg')
+ r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
+ rf=rf+'.gpg'
+ res.append(rf)
+ return res
+
+ def collect_images(self):
+ res = []
+ for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
+ im = os.path.join(self._dirname,i.get('fileref'))
+ if os.path.isfile(im):
+ res.append(im)
+ else:
+ print "WARNING: File "+im+" is missing!"
+ return res
+
+ def collect_forms(self):
+ res = []
+ for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
+ pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
+ im = os.path.join(self._dirname,pyscript)
+ if os.path.isfile(im):
+ res.append(im)
+ return res
+
+ def tostring(self):
+ return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
+
+ def xslt(self,transform):
+ return etree.tostring(transform(self._doc))
+
+ def clean(self):
+ def recursively_empty(e):
+ if e.text:
+ return False
+ return all((recursively_empty(c) for c in e.iterchildren()))
+
+ context = etree.iterwalk(self._doc)
+ for action, elem in context:
+ parent = elem.getparent()
+ if recursively_empty(elem):
+ parent.remove(elem)
--- /dev/null
+#!/usr/bin/python
+
+from PIL import Image as PIL_Image
+from libxmp import consts
+from libxmp import XMPFiles
+from treecutter.tools import sizeof_fmt
+
+
+class Image():
+ """Class representing an image"""
+ def __init__(self,filename):
+ self._filename = filename
+ self._format = {}
+
+ def infostr(self):
+ image = PIL_Image.open(self._filename)
+ w,d = image.size
+ image.close()
+ byte = os.path.getsize(self._filename)
+ return "[%dx%d (%s)]" % (w,d,sizeof_fmt(byte))
+
+ def resize(self,x,y):
+ size = (x, y)
+ outfile, ext = os.path.splitext(self._filename)
+ outfile = "%s.%dx%d.%s" % (outfile, size[0], size[1], ext)
+ if not os.path.exists(outfile):
+ im = PIL_Image.open(infile)
+ im.thumbnail(size, PIL_Image.ANTIALIAS)
+ bg = PIL_Image.new('RGBA', size, (0, 0, 0, 0))
+ bg.paste(im,((size[0]-im.size[0])/2, (size[1]-im.size[1])/2))
+ im.save(outfile)
+ self._format[size] = outfile
+ return outfile
+
+ def set_generated(self):
+ print self._filename
+
+ def generated(self):
+ print self._filename
+
+ def thumbnail(self):
+ return image.resize(50,50)
+
+ def slider(self):
+ return image.resize(700,438)
+
+ def caption(self):
+ xmpfile = XMPFiles(file_path=self._filename)
+ xmp = xmpfile.get_xmp()
+ cap = xmp.get_property(consts.XMP_NS_DC, 'description[1]' )
+ xmpfile.close_file()
+ return cap
import gnupg
import codecs
from lxml import etree
+from lxml.builder import ElementMaker
#from jinja2 import Template
import jinja2
-from pkg_resources import resource_filename, resource_listdir
from time import time
import treecutter.constants as const
+from treecutter.docbook import Docbook
from treecutter.tools import mkdir_p
class Page():
self._rendered_article = art
def prepare(self):
- self._doc = etree.parse(self._file)
- t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
- if t:
- self._title = unicode(t[0].text)
- ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
- if ta:
- self._menu = unicode(ta[0].text)
- dirname = os.path.dirname(self._file)
- cwd = os.getcwd()
- code = self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH)
- if code:
- for c in code:
- href = c.get('href')
- alang = c.get('accept-language')
- xpointer = c.get('xpointer')
- (p, ext) = os.path.splitext(href)
- if ext in const.valid_scripts:
- exe = []
- script = os.path.join(os.path.abspath(dirname)+'/'+href)
- if os.path.isfile(script):
- exe.append(script)
- else:
- if href in resource_listdir('xinclude', ''):
- script = resource_filename('xinclude', href)
- exe.append(script)
- else:
- print "Script "+href+" in "+self._file+" missing"
- if alang:
- exe.append("lang="+alang)
- if xpointer:
- exe.append("xptr="+xpointer)
- print " executing %15s" % (href),
- ts = time()
- os.chdir(dirname)
- xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- (stdout, stderr) = xml.communicate()
- if stderr:
- print " ".join(exe)+" ERROR : [ "+stderr+" ]"
- os.chdir(cwd)
- te = time()
- print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer)
- xstr = etree.fromstring(stdout)
-# inserting the generated code and remove the xinclude reference
- idp = c.getparent()
- idp.insert(idp.index(c)+1,xstr)
- idp.remove(c)
+ self._doc = Docbook(self._file)
+ (self._title, self._menu) = self._doc.title()
+# self._doc.expand_imageobjects()
+ self._doc.parse_xincludes()
+ doc = self._doc.collect_links()
+ img = self._doc.collect_images()
+ form= self._doc.collect_forms()
+ self._resources = doc + img + form
- for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
- rf = os.path.join(dirname,r.get(const.XLINK+'href'))
- if os.path.isfile(rf):
- if r.get('security')=='encrypt':
- with open(rf, 'rb') as f:
- gpg = gnupg.GPG()
- status = gpg.encrypt_file(
- f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
- output=rf+'.gpg')
- r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
- rf=rf+'.gpg'
- self._resources.append(rf)
- for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
- im = os.path.join(dirname,i.get('fileref'))
- if os.path.isfile(im):
- self._resources.append(im)
- else:
- print "WARNING: File "+im+" is missing!"
- for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
- pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
- im = os.path.join(dirname,pyscript)
- if os.path.isfile(im):
- self._resources.append(im)
-
- def render(self, style):
-
-# xslt_root = etree.XML(open(style+"docbook.xsl", 'r').read())
-# transform = etree.XSLT(xslt_root)
-# result = etree.tostring(transform(xml_root))
-
- cwd = os.getcwd()
- dirname = os.path.dirname(self._file)
- os.chdir(dirname)
- infile = os.path.basename(tempfile.mktemp())
- outfile = tempfile.mktemp()
- tfi = open(infile,'w')
- tfi.write(etree.tostring(self._doc,encoding='UTF-8',pretty_print=False))
- tfi.close()
-# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt]
- cmd = ["xsltproc","--xinclude","--output",outfile,style+"docbook.xhtml5.xsl",infile]
- retcode = subprocess.call(cmd)
- if retcode:
- print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
- tfo = codecs.open(outfile, "r", "utf-8")
- self._rendered_article = tfo.read()
- tfo.close()
- os.remove(infile)
- os.remove(outfile)
- os.chdir(cwd)
+ def render(self, transform):
+ self._rendered_article = self._doc.xslt(transform['xhtml5'])
def template(self,sitemap,style,tdir,subdir):
htmlmenu = sitemap.gen_menu(self._lang,None,"menu")
languages=[tran])
t3 = time()
print "Language [%5.2f s]" % (round(t3-t2,2))
+ transform = {}
+ transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
for link in self._tree:
- link.render(self._style)
+ link.render(transform)
t4 = time()
print "Render [%5.2f s]" % (round(t4-t3,2))
for link in self._tree: