treecutter/docbook.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import subprocess
   5
   6 from lxml import etree
   7 from lxml.builder import ElementMaker
   8
   9 from pkg_resources import resource_filename, resource_listdir
  10 from time import time
  11
  12 import treecutter.constants as const
  13 from treecutter.image import Image
  14
  15 class Docbook():
  16     """Class representing a docbook document"""
  17     def __init__(self,filename):
  18         self._filename = filename
  19         self._doc = etree.parse(self._filename)
  20         self._dirname = os.path.dirname(self._filename)
  21
  22     def title(self):
  23         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
  24         if t:
  25             t = unicode(t[0].text)
  26         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
  27         if ta:
  28            ta = unicode(ta[0].text)
  29         return (t, ta)
  30
  31     def expand_imageobjects(self):
  32         cwd = os.getcwd()
  33         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  34         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
  35         for i in images:
  36             os.chdir(self._dirname)
  37             im = i.get('fileref')
  38             img = Image(im)
  39             caption = db.caption()
  40             for p in img.caption().split('\n\n'):
  41                 caption.append(db.para(p))
  42             link = db.para(db.link(img.infostr(),
  43                                    **{const.XLINK+"href": img.filename()}))
  44             caption.append(link)
  45             mo = db.mediaobject(db.imageobject(
  46                 db.imagedata(fileref=img.resize(800,600))),caption)
  47             iop = i.getparent()
  48             mop = iop.getparent()
  49             mopp = mop.getparent()
  50             mopp.insert(mopp.index(mop)+1,mo)
  51             mopp.remove(mop)
  52             os.chdir(cwd)
  53
  54
  55     def parse_xincludes(self):
  56         cwd = os.getcwd()
  57         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
  58             href = c.get('href')
  59             alang = c.get('accept-language')
  60             xpointer = c.get('xpointer')
  61             (p, ext) = os.path.splitext(href)
  62             if ext in const.valid_scripts:
  63                 exe = []
  64                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
  65                 if os.path.isfile(script):
  66                     exe.append(script)
  67                 else:
  68                     if href in resource_listdir('xinclude', ''):
  69                         script = resource_filename('xinclude', href)
  70                         exe.append(script)
  71                     else:
  72                         print "Script "+href+" in "+self._filename+" missing"
  73                 if alang:
  74                     exe.append("lang="+alang)
  75                 if xpointer:
  76                     exe.append("xptr="+xpointer)
  77                 print "  executing %15s" % (href),
  78                 ts = time()
  79                 os.chdir(self._dirname)
  80                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
  81                                        stderr=subprocess.PIPE)
  82                 (stdout, stderr) = xml.communicate()
  83                 #print xml.returnvalue
  84                 if stderr:
  85                     print " ".join(exe)+" ERROR : [ "+stderr+" ]"
  86                     exit
  87                 os.chdir(cwd)
  88                 te = time()
  89                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
  90                 xstr = etree.fromstring(stdout)
  91 # inserting the generated code and remove the xinclude reference
  92                 idp = c.getparent()
  93                 idp.insert(idp.index(c)+1,xstr)
  94                 idp.remove(c)
  95
  96     def collect_links(self):
  97         res = []
  98         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
  99             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
 100             if os.path.isfile(rf):
 101                 if r.get('security')=='encrypt':
 102                     with open(rf, 'rb') as f:
 103                         gpg = gnupg.GPG()
 104                         status = gpg.encrypt_file(
 105                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
 106                         output=rf+'.gpg')
 107                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
 108                     rf=rf+'.gpg'
 109                 res.append(rf)
 110         return res
 111
 112     def collect_images(self):
 113         res = []
 114         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
 115             im = os.path.join(self._dirname,i.get('fileref'))
 116             if os.path.isfile(im):
 117                 res.append(im)
 118             else:
 119                 print "WARNING: File "+im+" is missing!"
 120         return res
 121
 122     def collect_forms(self):
 123         res = []
 124         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
 125             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
 126             im = os.path.join(self._dirname,pyscript)
 127             if os.path.isfile(im):
 128                 res.append(im)
 129         return res
 130
 131     def tostring(self):
 132         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
 133
 134     def xslt(self,transform):
 135         return etree.tostring(transform(self._doc))
 136
 137     def clean(self):
 138         def recursively_empty(e):
 139             if e.text:
 140                 return False
 141             return all((recursively_empty(c) for c in e.iterchildren()))
 142
 143         context = etree.iterwalk(self._doc)
 144         for action, elem in context:
 145             parent = elem.getparent()
 146             if recursively_empty(elem):
 147                 parent.remove(elem)