From b011c30d2ca867e3e68451c7a3bfced704216fc1 Mon Sep 17 00:00:00 2001 From: Fredrik Unger Date: Fri, 27 Feb 2015 10:16:29 +0100 Subject: [PATCH] page: using lxml transform, simple docbook & image Switching from external call to use the internal lxml xslt transform. Creating a class for docbook and image to separate out docbook specific code and image specific code. --- treecutter/docbook.py | 133 ++++++++++++++++++++++++++++++++++++++++++ treecutter/image.py | 52 +++++++++++++++++ treecutter/page.py | 109 ++++------------------------------ treecutter/sitemap.py | 4 +- 4 files changed, 200 insertions(+), 98 deletions(-) create mode 100644 treecutter/docbook.py create mode 100644 treecutter/image.py diff --git a/treecutter/docbook.py b/treecutter/docbook.py new file mode 100644 index 0000000..fd822ce --- /dev/null +++ b/treecutter/docbook.py @@ -0,0 +1,133 @@ +#!/usr/bin/python + +import os +import subprocess + +from lxml import etree +from lxml.builder import ElementMaker + +from pkg_resources import resource_filename, resource_listdir +from time import time + +import treecutter.constants as const +from treecutter.image import Image + +class Docbook(): + """Class representing a docbook document""" + def __init__(self,filename): + self._filename = filename + self._doc = etree.parse(self._filename) + self._dirname = os.path.dirname(self._filename) + + def title(self): + t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH) + if t: + t = unicode(t[0].text) + ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH) + if ta: + ta = unicode(ta[0].text) + return (t, ta) + + def expand_imageobjects(self): + db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP) + images = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH) + for io in images: + image = Image(io) + link = db.link(image.infostr(),**{const.XLINK+"href": f}) + io = db.imageobject( + db.imagedata(fileref=image.format(800,600), width=str(800), depth=str(600)), + db.caption(db.para(image.caption()))) + + def parse_xincludes(self): + cwd = os.getcwd() + for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH): + href = c.get('href') + alang = c.get('accept-language') + xpointer = c.get('xpointer') + (p, ext) = os.path.splitext(href) + if ext in const.valid_scripts: + exe = [] + script = os.path.join(os.path.abspath(self._dirname)+'/'+href) + if os.path.isfile(script): + exe.append(script) + else: + if href in resource_listdir('xinclude', ''): + script = resource_filename('xinclude', href) + exe.append(script) + else: + print "Script "+href+" in "+self._filename+" missing" + if alang: + exe.append("lang="+alang) + if xpointer: + exe.append("xptr="+xpointer) + print " executing %15s" % (href), + ts = time() + os.chdir(self._dirname) + xml = subprocess.Popen(exe,stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (stdout, stderr) = xml.communicate() + #print xml.returnvalue + if stderr: + print " ".join(exe)+" ERROR : [ "+stderr+" ]" + exit + os.chdir(cwd) + te = time() + print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer) + xstr = etree.fromstring(stdout) +# inserting the generated code and remove the xinclude reference + idp = c.getparent() + idp.insert(idp.index(c)+1,xstr) + idp.remove(c) + + def collect_links(self): + res = [] + for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH): + rf = os.path.join(self._dirname,r.get(const.XLINK+'href')) + if os.path.isfile(rf): + if r.get('security')=='encrypt': + with open(rf, 'rb') as f: + gpg = gnupg.GPG() + status = gpg.encrypt_file( + f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True, + output=rf+'.gpg') + r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg') + rf=rf+'.gpg' + res.append(rf) + return res + + def collect_images(self): + res = [] + for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH): + im = os.path.join(self._dirname,i.get('fileref')) + if os.path.isfile(im): + res.append(im) + else: + print "WARNING: File "+im+" is missing!" + return res + + def collect_forms(self): + res = [] + for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH): + pyscript = re.split('\.py',i.get('action'),1)[0]+'.py' + im = os.path.join(self._dirname,pyscript) + if os.path.isfile(im): + res.append(im) + return res + + def tostring(self): + return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False) + + def xslt(self,transform): + return etree.tostring(transform(self._doc)) + + def clean(self): + def recursively_empty(e): + if e.text: + return False + return all((recursively_empty(c) for c in e.iterchildren())) + + context = etree.iterwalk(self._doc) + for action, elem in context: + parent = elem.getparent() + if recursively_empty(elem): + parent.remove(elem) diff --git a/treecutter/image.py b/treecutter/image.py new file mode 100644 index 0000000..7d01462 --- /dev/null +++ b/treecutter/image.py @@ -0,0 +1,52 @@ +#!/usr/bin/python + +from PIL import Image as PIL_Image +from libxmp import consts +from libxmp import XMPFiles +from treecutter.tools import sizeof_fmt + + +class Image(): + """Class representing an image""" + def __init__(self,filename): + self._filename = filename + self._format = {} + + def infostr(self): + image = PIL_Image.open(self._filename) + w,d = image.size + image.close() + byte = os.path.getsize(self._filename) + return "[%dx%d (%s)]" % (w,d,sizeof_fmt(byte)) + + def resize(self,x,y): + size = (x, y) + outfile, ext = os.path.splitext(self._filename) + outfile = "%s.%dx%d.%s" % (outfile, size[0], size[1], ext) + if not os.path.exists(outfile): + im = PIL_Image.open(infile) + im.thumbnail(size, PIL_Image.ANTIALIAS) + bg = PIL_Image.new('RGBA', size, (0, 0, 0, 0)) + bg.paste(im,((size[0]-im.size[0])/2, (size[1]-im.size[1])/2)) + im.save(outfile) + self._format[size] = outfile + return outfile + + def set_generated(self): + print self._filename + + def generated(self): + print self._filename + + def thumbnail(self): + return image.resize(50,50) + + def slider(self): + return image.resize(700,438) + + def caption(self): + xmpfile = XMPFiles(file_path=self._filename) + xmp = xmpfile.get_xmp() + cap = xmp.get_property(consts.XMP_NS_DC, 'description[1]' ) + xmpfile.close_file() + return cap diff --git a/treecutter/page.py b/treecutter/page.py index dea69e6..8843df9 100644 --- a/treecutter/page.py +++ b/treecutter/page.py @@ -7,11 +7,12 @@ import getpass import gnupg import codecs from lxml import etree +from lxml.builder import ElementMaker #from jinja2 import Template import jinja2 -from pkg_resources import resource_filename, resource_listdir from time import time import treecutter.constants as const +from treecutter.docbook import Docbook from treecutter.tools import mkdir_p class Page(): @@ -39,103 +40,17 @@ class Page(): self._rendered_article = art def prepare(self): - self._doc = etree.parse(self._file) - t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH) - if t: - self._title = unicode(t[0].text) - ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH) - if ta: - self._menu = unicode(ta[0].text) - dirname = os.path.dirname(self._file) - cwd = os.getcwd() - code = self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH) - if code: - for c in code: - href = c.get('href') - alang = c.get('accept-language') - xpointer = c.get('xpointer') - (p, ext) = os.path.splitext(href) - if ext in const.valid_scripts: - exe = [] - script = os.path.join(os.path.abspath(dirname)+'/'+href) - if os.path.isfile(script): - exe.append(script) - else: - if href in resource_listdir('xinclude', ''): - script = resource_filename('xinclude', href) - exe.append(script) - else: - print "Script "+href+" in "+self._file+" missing" - if alang: - exe.append("lang="+alang) - if xpointer: - exe.append("xptr="+xpointer) - print " executing %15s" % (href), - ts = time() - os.chdir(dirname) - xml = subprocess.Popen(exe,stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - (stdout, stderr) = xml.communicate() - if stderr: - print " ".join(exe)+" ERROR : [ "+stderr+" ]" - os.chdir(cwd) - te = time() - print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer) - xstr = etree.fromstring(stdout) -# inserting the generated code and remove the xinclude reference - idp = c.getparent() - idp.insert(idp.index(c)+1,xstr) - idp.remove(c) + self._doc = Docbook(self._file) + (self._title, self._menu) = self._doc.title() +# self._doc.expand_imageobjects() + self._doc.parse_xincludes() + doc = self._doc.collect_links() + img = self._doc.collect_images() + form= self._doc.collect_forms() + self._resources = doc + img + form - for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH): - rf = os.path.join(dirname,r.get(const.XLINK+'href')) - if os.path.isfile(rf): - if r.get('security')=='encrypt': - with open(rf, 'rb') as f: - gpg = gnupg.GPG() - status = gpg.encrypt_file( - f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True, - output=rf+'.gpg') - r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg') - rf=rf+'.gpg' - self._resources.append(rf) - for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH): - im = os.path.join(dirname,i.get('fileref')) - if os.path.isfile(im): - self._resources.append(im) - else: - print "WARNING: File "+im+" is missing!" - for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH): - pyscript = re.split('\.py',i.get('action'),1)[0]+'.py' - im = os.path.join(dirname,pyscript) - if os.path.isfile(im): - self._resources.append(im) - - def render(self, style): - -# xslt_root = etree.XML(open(style+"docbook.xsl", 'r').read()) -# transform = etree.XSLT(xslt_root) -# result = etree.tostring(transform(xml_root)) - - cwd = os.getcwd() - dirname = os.path.dirname(self._file) - os.chdir(dirname) - infile = os.path.basename(tempfile.mktemp()) - outfile = tempfile.mktemp() - tfi = open(infile,'w') - tfi.write(etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)) - tfi.close() -# cmd = ["saxon-xslt-xinclude","-o",outfile,infile,style_xslt] - cmd = ["xsltproc","--xinclude","--output",outfile,style+"docbook.xhtml5.xsl",infile] - retcode = subprocess.call(cmd) - if retcode: - print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']' - tfo = codecs.open(outfile, "r", "utf-8") - self._rendered_article = tfo.read() - tfo.close() - os.remove(infile) - os.remove(outfile) - os.chdir(cwd) + def render(self, transform): + self._rendered_article = self._doc.xslt(transform['xhtml5']) def template(self,sitemap,style,tdir,subdir): htmlmenu = sitemap.gen_menu(self._lang,None,"menu") diff --git a/treecutter/sitemap.py b/treecutter/sitemap.py index 34fb328..11e300e 100644 --- a/treecutter/sitemap.py +++ b/treecutter/sitemap.py @@ -73,8 +73,10 @@ class Sitemap(): languages=[tran]) t3 = time() print "Language [%5.2f s]" % (round(t3-t2,2)) + transform = {} + transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl")) for link in self._tree: - link.render(self._style) + link.render(transform) t4 = time() print "Render [%5.2f s]" % (round(t4-t3,2)) for link in self._tree: -- 2.30.2