treecutter/docbook.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import subprocess
   5
   6 from lxml import etree
   7 from lxml.builder import ElementMaker
   8
   9 from pkg_resources import resource_filename, resource_listdir
  10 from time import time
  11
  12 import treecutter.constants as const
  13 from treecutter.image import Image
  14
  15 class Docbook():
  16     """Class representing a docbook document"""
  17     def __init__(self,filename):
  18         self._filename = filename
  19         self._doc = etree.parse(self._filename)
  20         self._dirname = os.path.dirname(self._filename)
  21
  22     def title(self):
  23         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
  24         if t:
  25             t = unicode(t[0].text)
  26         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
  27         if ta:
  28            ta = unicode(ta[0].text)
  29         return (t, ta)
  30
  31     def expand_imageobjects(self):
  32         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  33         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
  34         for io in images:
  35             image = Image(io)
  36             link = db.link(image.infostr(),**{const.XLINK+"href": f})
  37             io = db.imageobject(
  38                 db.imagedata(fileref=image.format(800,600), width=str(800), depth=str(600)),
  39                 db.caption(db.para(image.caption())))
  40
  41     def parse_xincludes(self):
  42         cwd = os.getcwd()
  43         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
  44             href = c.get('href')
  45             alang = c.get('accept-language')
  46             xpointer = c.get('xpointer')
  47             (p, ext) = os.path.splitext(href)
  48             if ext in const.valid_scripts:
  49                 exe = []
  50                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
  51                 if os.path.isfile(script):
  52                     exe.append(script)
  53                 else:
  54                     if href in resource_listdir('xinclude', ''):
  55                         script = resource_filename('xinclude', href)
  56                         exe.append(script)
  57                     else:
  58                         print "Script "+href+" in "+self._filename+" missing"
  59                 if alang:
  60                     exe.append("lang="+alang)
  61                 if xpointer:
  62                     exe.append("xptr="+xpointer)
  63                 print "  executing %15s" % (href),
  64                 ts = time()
  65                 os.chdir(self._dirname)
  66                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
  67                                        stderr=subprocess.PIPE)
  68                 (stdout, stderr) = xml.communicate()
  69                 #print xml.returnvalue
  70                 if stderr:
  71                     print " ".join(exe)+" ERROR : [ "+stderr+" ]"
  72                     exit
  73                 os.chdir(cwd)
  74                 te = time()
  75                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
  76                 xstr = etree.fromstring(stdout)
  77 # inserting the generated code and remove the xinclude reference
  78                 idp = c.getparent()
  79                 idp.insert(idp.index(c)+1,xstr)
  80                 idp.remove(c)
  81
  82     def collect_links(self):
  83         res = []
  84         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
  85             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
  86             if os.path.isfile(rf):
  87                 if r.get('security')=='encrypt':
  88                     with open(rf, 'rb') as f:
  89                         gpg = gnupg.GPG()
  90                         status = gpg.encrypt_file(
  91                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
  92                         output=rf+'.gpg')
  93                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
  94                     rf=rf+'.gpg'
  95                 res.append(rf)
  96         return res
  97
  98     def collect_images(self):
  99         res = []
 100         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
 101             im = os.path.join(self._dirname,i.get('fileref'))
 102             if os.path.isfile(im):
 103                 res.append(im)
 104             else:
 105                 print "WARNING: File "+im+" is missing!"
 106         return res
 107
 108     def collect_forms(self):
 109         res = []
 110         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
 111             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
 112             im = os.path.join(self._dirname,pyscript)
 113             if os.path.isfile(im):
 114                 res.append(im)
 115         return res
 116
 117     def tostring(self):
 118         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
 119
 120     def xslt(self,transform):
 121         return etree.tostring(transform(self._doc))
 122
 123     def clean(self):
 124         def recursively_empty(e):
 125             if e.text:
 126                 return False
 127             return all((recursively_empty(c) for c in e.iterchildren()))
 128
 129         context = etree.iterwalk(self._doc)
 130         for action, elem in context:
 131             parent = elem.getparent()
 132             if recursively_empty(elem):
 133                 parent.remove(elem)