fd822ce8fe2cf6058fafa7528c56e0af0e38e0c8
[treecutter.git] / treecutter / docbook.py
1 #!/usr/bin/python
2
3 import os
4 import subprocess
5
6 from lxml import etree
7 from lxml.builder import ElementMaker
8
9 from pkg_resources import resource_filename, resource_listdir
10 from time import time
11
12 import treecutter.constants as const
13 from treecutter.image import Image
14
15 class Docbook():
16     """Class representing a docbook document"""
17     def __init__(self,filename):
18         self._filename = filename
19         self._doc = etree.parse(self._filename)
20         self._dirname = os.path.dirname(self._filename)
21
22     def title(self):
23         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
24         if t:
25             t = unicode(t[0].text)
26         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
27         if ta:
28            ta = unicode(ta[0].text)
29         return (t, ta)
30
31     def expand_imageobjects(self):
32         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
33         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
34         for io in images:
35             image = Image(io)
36             link = db.link(image.infostr(),**{const.XLINK+"href": f})
37             io = db.imageobject(
38                 db.imagedata(fileref=image.format(800,600), width=str(800), depth=str(600)),
39                 db.caption(db.para(image.caption())))
40
41     def parse_xincludes(self):
42         cwd = os.getcwd()
43         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
44             href = c.get('href')
45             alang = c.get('accept-language')
46             xpointer = c.get('xpointer')
47             (p, ext) = os.path.splitext(href)
48             if ext in const.valid_scripts:
49                 exe = []
50                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
51                 if os.path.isfile(script):
52                     exe.append(script)
53                 else:
54                     if href in resource_listdir('xinclude', ''):
55                         script = resource_filename('xinclude', href)
56                         exe.append(script)
57                     else:
58                         print "Script "+href+" in "+self._filename+" missing"
59                 if alang:
60                     exe.append("lang="+alang)
61                 if xpointer:
62                     exe.append("xptr="+xpointer)
63                 print "  executing %15s" % (href),
64                 ts = time()
65                 os.chdir(self._dirname)
66                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
67                                        stderr=subprocess.PIPE)
68                 (stdout, stderr) = xml.communicate()
69                 #print xml.returnvalue
70                 if stderr:
71                     print " ".join(exe)+" ERROR : [ "+stderr+" ]"
72                     exit
73                 os.chdir(cwd)
74                 te = time()
75                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
76                 xstr = etree.fromstring(stdout)
77 # inserting the generated code and remove the xinclude reference
78                 idp = c.getparent()
79                 idp.insert(idp.index(c)+1,xstr)
80                 idp.remove(c)
81
82     def collect_links(self):
83         res = []
84         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
85             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
86             if os.path.isfile(rf):
87                 if r.get('security')=='encrypt':
88                     with open(rf, 'rb') as f:
89                         gpg = gnupg.GPG()
90                         status = gpg.encrypt_file(
91                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
92                         output=rf+'.gpg')
93                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
94                     rf=rf+'.gpg'
95                 res.append(rf)
96         return res
97
98     def collect_images(self):
99         res = []
100         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
101             im = os.path.join(self._dirname,i.get('fileref'))
102             if os.path.isfile(im):
103                 res.append(im)
104             else:
105                 print "WARNING: File "+im+" is missing!"
106         return res
107
108     def collect_forms(self):
109         res = []
110         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
111             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
112             im = os.path.join(self._dirname,pyscript)
113             if os.path.isfile(im):
114                 res.append(im)
115         return res
116
117     def tostring(self):
118         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
119
120     def xslt(self,transform):
121         return etree.tostring(transform(self._doc))
122
123     def clean(self):
124         def recursively_empty(e):
125             if e.text:
126                 return False
127             return all((recursively_empty(c) for c in e.iterchildren()))
128
129         context = etree.iterwalk(self._doc)
130         for action, elem in context:
131             parent = elem.getparent()
132             if recursively_empty(elem):
133                 parent.remove(elem)