X-Git-Url: https://source.tree.se/git?p=treecutter.git;a=blobdiff_plain;f=treecutter%2Fdocbook.py;fp=treecutter%2Fdocbook.py;h=fd822ce8fe2cf6058fafa7528c56e0af0e38e0c8;hp=0000000000000000000000000000000000000000;hb=b011c30d2ca867e3e68451c7a3bfced704216fc1;hpb=369be7201d9703391d90d488cca8862b903ef15e diff --git a/treecutter/docbook.py b/treecutter/docbook.py new file mode 100644 index 0000000..fd822ce --- /dev/null +++ b/treecutter/docbook.py @@ -0,0 +1,133 @@ +#!/usr/bin/python + +import os +import subprocess + +from lxml import etree +from lxml.builder import ElementMaker + +from pkg_resources import resource_filename, resource_listdir +from time import time + +import treecutter.constants as const +from treecutter.image import Image + +class Docbook(): + """Class representing a docbook document""" + def __init__(self,filename): + self._filename = filename + self._doc = etree.parse(self._filename) + self._dirname = os.path.dirname(self._filename) + + def title(self): + t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH) + if t: + t = unicode(t[0].text) + ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH) + if ta: + ta = unicode(ta[0].text) + return (t, ta) + + def expand_imageobjects(self): + db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP) + images = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH) + for io in images: + image = Image(io) + link = db.link(image.infostr(),**{const.XLINK+"href": f}) + io = db.imageobject( + db.imagedata(fileref=image.format(800,600), width=str(800), depth=str(600)), + db.caption(db.para(image.caption()))) + + def parse_xincludes(self): + cwd = os.getcwd() + for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH): + href = c.get('href') + alang = c.get('accept-language') + xpointer = c.get('xpointer') + (p, ext) = os.path.splitext(href) + if ext in const.valid_scripts: + exe = [] + script = os.path.join(os.path.abspath(self._dirname)+'/'+href) + if os.path.isfile(script): + exe.append(script) + else: + if href in resource_listdir('xinclude', ''): + script = resource_filename('xinclude', href) + exe.append(script) + else: + print "Script "+href+" in "+self._filename+" missing" + if alang: + exe.append("lang="+alang) + if xpointer: + exe.append("xptr="+xpointer) + print " executing %15s" % (href), + ts = time() + os.chdir(self._dirname) + xml = subprocess.Popen(exe,stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (stdout, stderr) = xml.communicate() + #print xml.returnvalue + if stderr: + print " ".join(exe)+" ERROR : [ "+stderr+" ]" + exit + os.chdir(cwd) + te = time() + print " [%5.2f s] (%s)" % (round(te-ts,2),xpointer) + xstr = etree.fromstring(stdout) +# inserting the generated code and remove the xinclude reference + idp = c.getparent() + idp.insert(idp.index(c)+1,xstr) + idp.remove(c) + + def collect_links(self): + res = [] + for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH): + rf = os.path.join(self._dirname,r.get(const.XLINK+'href')) + if os.path.isfile(rf): + if r.get('security')=='encrypt': + with open(rf, 'rb') as f: + gpg = gnupg.GPG() + status = gpg.encrypt_file( + f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True, + output=rf+'.gpg') + r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg') + rf=rf+'.gpg' + res.append(rf) + return res + + def collect_images(self): + res = [] + for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH): + im = os.path.join(self._dirname,i.get('fileref')) + if os.path.isfile(im): + res.append(im) + else: + print "WARNING: File "+im+" is missing!" + return res + + def collect_forms(self): + res = [] + for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH): + pyscript = re.split('\.py',i.get('action'),1)[0]+'.py' + im = os.path.join(self._dirname,pyscript) + if os.path.isfile(im): + res.append(im) + return res + + def tostring(self): + return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False) + + def xslt(self,transform): + return etree.tostring(transform(self._doc)) + + def clean(self): + def recursively_empty(e): + if e.text: + return False + return all((recursively_empty(c) for c in e.iterchildren())) + + context = etree.iterwalk(self._doc) + for action, elem in context: + parent = elem.getparent() + if recursively_empty(elem): + parent.remove(elem)