treecutter/docbook.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import subprocess
   5
   6 from lxml import etree
   7 from lxml.builder import ElementMaker
   8
   9 from pkg_resources import resource_filename, resource_listdir
  10 from time import time
  11
  12 import treecutter.constants as const
  13 from treecutter.image import Image
  14 from treecutter.tools import warning
  15
  16 class Docbook():
  17     """Class representing a docbook document"""
  18     def __init__(self,filename):
  19         self._filename = filename
  20         self._doc = etree.parse(self._filename)
  21         self._dirname = os.path.dirname(self._filename)
  22
  23     def title(self):
  24         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
  25         if t:
  26             t = unicode(t[0].text)
  27         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
  28         if ta:
  29            ta = unicode(ta[0].text)
  30         return (t, ta)
  31
  32     def expand_imageobjects(self):
  33         cwd = os.getcwd()
  34         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  35         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
  36         for i in images:
  37             os.chdir(self._dirname)
  38             im = i.get('fileref')
  39             img = Image(im)
  40             caption = db.caption()
  41             for p in img.caption().split('\n\n'):
  42                 caption.append(db.para(p))
  43             link = db.para(db.link(img.infostr(),
  44                                    **{const.XLINK+"href": img.filename()}))
  45             caption.append(link)
  46             mo = db.mediaobject(db.imageobject(
  47                 db.imagedata(fileref=img.resize(800,600))),caption)
  48             iop = i.getparent()
  49             mop = iop.getparent()
  50             mopp = mop.getparent()
  51             mopp.insert(mopp.index(mop)+1,mo)
  52             mopp.remove(mop)
  53             os.chdir(cwd)
  54
  55
  56     def parse_xincludes(self):
  57         cwd = os.getcwd()
  58         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
  59             href = c.get('href')
  60             alang = c.get('accept-language')
  61             xpointer = c.get('xpointer')
  62             (p, ext) = os.path.splitext(href)
  63             if ext in const.valid_scripts:
  64                 exe = []
  65                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
  66                 if os.path.isfile(script):
  67                     exe.append(script)
  68                 else:
  69                     if href in resource_listdir('xinclude', ''):
  70                         script = resource_filename('xinclude', href)
  71                         exe.append(script)
  72                     else:
  73                         print "Script "+href+" in "+self._filename+" missing"
  74                 if alang:
  75                     exe.append("lang="+alang)
  76                 if xpointer:
  77                     exe.append("xptr="+xpointer)
  78                 print "  executing %15s" % (href),
  79                 ts = time()
  80                 os.chdir(self._dirname)
  81                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
  82                                        stderr=subprocess.PIPE)
  83                 (stdout, stderr) = xml.communicate()
  84                 #print xml.returnvalue
  85                 if stderr:
  86                     warning("%s : %s" % (" ".join(exe),stderr))
  87                     warning(stdout)
  88                     exit
  89                 os.chdir(cwd)
  90                 te = time()
  91                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
  92                 xstr = etree.fromstring(stdout)
  93 # inserting the generated code and remove the xinclude reference
  94                 idp = c.getparent()
  95                 idp.insert(idp.index(c)+1,xstr)
  96                 idp.remove(c)
  97
  98     def collect_links(self):
  99         res = []
 100         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
 101             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
 102             if os.path.isfile(rf):
 103                 if r.get('security')=='encrypt':
 104                     with open(rf, 'rb') as f:
 105                         gpg = gnupg.GPG()
 106                         status = gpg.encrypt_file(
 107                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
 108                         output=rf+'.gpg')
 109                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
 110                     rf=rf+'.gpg'
 111                 res.append(rf)
 112         return res
 113
 114     def collect_images(self):
 115         res = []
 116         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
 117             im = os.path.join(self._dirname,i.get('fileref'))
 118             if os.path.isfile(im):
 119                 res.append(im)
 120             else:
 121                 print "WARNING: File "+im+" is missing!"
 122         return res
 123
 124     def collect_videos(self):
 125         res = []
 126         for i in self._doc.xpath(u"//db:videodata[@fileref]",namespaces=const.XPATH):
 127             im = os.path.join(self._dirname,i.get('fileref'))
 128             if os.path.isfile(im):
 129                 res.append(im)
 130             else:
 131                 print "WARNING: File "+im+" is missing!"
 132         return res
 133
 134     def collect_forms(self):
 135         res = []
 136         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
 137             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
 138             im = os.path.join(self._dirname,pyscript)
 139             if os.path.isfile(im):
 140                 res.append(im)
 141         return res
 142
 143     def tostring(self):
 144         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
 145
 146     def xslt(self,transform):
 147         return etree.tostring(transform(self._doc))
 148
 149     def clean(self):
 150         def recursively_empty(e):
 151             if e.text:
 152                 return False
 153             return all((recursively_empty(c) for c in e.iterchildren()))
 154
 155         context = etree.iterwalk(self._doc)
 156         for action, elem in context:
 157             parent = elem.getparent()
 158             if recursively_empty(elem):
 159                 parent.remove(elem)