a143487f7c071a2d8ed23805f56797bcac88beaa
[treecutter.git] / treecutter / docbook.py
1 #!/usr/bin/python
2
3 import os
4 import subprocess
5
6 from lxml import etree
7 from lxml.builder import ElementMaker
8
9 from pkg_resources import resource_filename, resource_listdir
10 from time import time
11
12 import treecutter.constants as const
13 from treecutter.image import Image
14
15 class Docbook():
16     """Class representing a docbook document"""
17     def __init__(self,filename):
18         self._filename = filename
19         self._doc = etree.parse(self._filename)
20         self._dirname = os.path.dirname(self._filename)
21
22     def title(self):
23         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
24         if t:
25             t = unicode(t[0].text)
26         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
27         if ta:
28            ta = unicode(ta[0].text)
29         return (t, ta)
30
31     def expand_imageobjects(self):
32         cwd = os.getcwd()
33         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
34         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
35         for i in images:
36             os.chdir(self._dirname)
37             im = i.get('fileref')
38             img = Image(im)
39             caption = db.caption()
40             for p in img.caption().split('\n\n'):
41                 caption.append(db.para(p))
42             link = db.para(db.link(img.infostr(),
43                                    **{const.XLINK+"href": img.filename()}))
44             caption.append(link)
45             mo = db.mediaobject(db.imageobject(
46                 db.imagedata(fileref=img.resize(800,600))),caption)
47             iop = i.getparent()
48             mop = iop.getparent()
49             mopp = mop.getparent()
50             mopp.insert(mopp.index(mop)+1,mo)
51             mopp.remove(mop)
52             os.chdir(cwd)
53
54
55     def parse_xincludes(self):
56         cwd = os.getcwd()
57         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
58             href = c.get('href')
59             alang = c.get('accept-language')
60             xpointer = c.get('xpointer')
61             (p, ext) = os.path.splitext(href)
62             if ext in const.valid_scripts:
63                 exe = []
64                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
65                 if os.path.isfile(script):
66                     exe.append(script)
67                 else:
68                     if href in resource_listdir('xinclude', ''):
69                         script = resource_filename('xinclude', href)
70                         exe.append(script)
71                     else:
72                         print "Script "+href+" in "+self._filename+" missing"
73                 if alang:
74                     exe.append("lang="+alang)
75                 if xpointer:
76                     exe.append("xptr="+xpointer)
77                 print "  executing %15s" % (href),
78                 ts = time()
79                 os.chdir(self._dirname)
80                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
81                                        stderr=subprocess.PIPE)
82                 (stdout, stderr) = xml.communicate()
83                 #print xml.returnvalue
84                 if stderr:
85                     print " ".join(exe)+" ERROR : [ "+stderr+" ]"
86                     exit
87                 os.chdir(cwd)
88                 te = time()
89                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
90                 xstr = etree.fromstring(stdout)
91 # inserting the generated code and remove the xinclude reference
92                 idp = c.getparent()
93                 idp.insert(idp.index(c)+1,xstr)
94                 idp.remove(c)
95
96     def collect_links(self):
97         res = []
98         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
99             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
100             if os.path.isfile(rf):
101                 if r.get('security')=='encrypt':
102                     with open(rf, 'rb') as f:
103                         gpg = gnupg.GPG()
104                         status = gpg.encrypt_file(
105                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
106                         output=rf+'.gpg')
107                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
108                     rf=rf+'.gpg'
109                 res.append(rf)
110         return res
111
112     def collect_images(self):
113         res = []
114         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
115             im = os.path.join(self._dirname,i.get('fileref'))
116             if os.path.isfile(im):
117                 res.append(im)
118             else:
119                 print "WARNING: File "+im+" is missing!"
120         return res
121
122     def collect_forms(self):
123         res = []
124         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
125             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
126             im = os.path.join(self._dirname,pyscript)
127             if os.path.isfile(im):
128                 res.append(im)
129         return res
130
131     def tostring(self):
132         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
133
134     def xslt(self,transform):
135         return etree.tostring(transform(self._doc))
136
137     def clean(self):
138         def recursively_empty(e):
139             if e.text:
140                 return False
141             return all((recursively_empty(c) for c in e.iterchildren()))
142
143         context = etree.iterwalk(self._doc)
144         for action, elem in context:
145             parent = elem.getparent()
146             if recursively_empty(elem):
147                 parent.remove(elem)