size: printing size of style
[treecutter.git] / treecutter / docbook.py
1 #!/usr/bin/python
2
3 import os
4 import subprocess
5 import re
6
7 from lxml import etree
8 from lxml.builder import ElementMaker
9
10 from pkg_resources import resource_filename, resource_listdir
11 from time import time
12
13 import treecutter.constants as const
14 from treecutter.image import Image
15 #from treecutter.tools import warning
16
17 class Docbook():
18     """Class representing a docbook document"""
19     def __init__(self,filename):
20         self._filename = filename
21         self._doc = etree.parse(self._filename)
22         self._dirname = os.path.dirname(self._filename)
23
24     def title(self):
25         t = self._doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
26         if t:
27             t = unicode(t[0].text)
28         ta = self._doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
29         if ta:
30            ta = unicode(ta[0].text)
31         return (t, ta)
32
33     def status(self):
34         status = self._doc.xpath(u'/db:article[@status]',namespaces=const.XPATH)
35         if status:
36             return unicode(status[0].get('status'))
37         return None
38
39     def role(self):
40         art = self._doc.xpath(u'/db:article[@role]',namespaces=const.XPATH)
41         if art:
42             return unicode(art[0].get('role'))
43         return 'index'
44
45     def userlevel(self):
46         lvl = self._doc.xpath(u'/db:article[@userlevel]',namespaces=const.XPATH)
47         if lvl:
48             lvlstr = unicode(lvl[0].get('userlevel'))
49             return {
50                 'Level 1': 1,
51                 'Level 2': 2,
52                 'Level 3': 3,
53                 'Level 4': 4,
54                 'Level 5': 5,
55             }.get(lvlstr, 0)
56         return 0
57
58     def expand_imageobjects(self):
59         cwd = os.getcwd()
60         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
61         images  = self._doc.xpath(u"//db:imageobject/db:imagedata[@fileref]",namespaces=const.XPATH)
62         for i in images:
63             os.chdir(self._dirname)
64             im = i.get('fileref')
65             img = Image(im)
66             caption = db.caption()
67             for p in img.caption().split('\n\n'):
68                 caption.append(db.para(p))
69             link = db.para(db.link(img.infostr(),
70                                    **{const.XLINK+"href": img.filename()}))
71             caption.append(link)
72             mo = db.mediaobject(db.imageobject(
73                 db.imagedata(fileref=img.resize(800,600))),caption)
74             iop = i.getparent()
75             mop = iop.getparent()
76             mopp = mop.getparent()
77             mopp.insert(mopp.index(mop)+1,mo)
78             mopp.remove(mop)
79             os.chdir(cwd)
80
81
82     def parse_xincludes(self):
83         cwd = os.getcwd()
84         for c in self._doc.xpath(u"//xi:include[@parse='text']",namespaces=const.XPATH):
85             href = c.get('href')
86             alang = c.get('accept-language')
87             xpointer = c.get('xpointer')
88             (p, ext) = os.path.splitext(href)
89             if ext in const.valid_scripts:
90                 exe = []
91                 script = os.path.join(os.path.abspath(self._dirname)+'/'+href)
92                 if os.path.isfile(script) and os.access(script, os.X_OK):
93                     exe.append(script)
94                 else:
95                     if href in resource_listdir('xinclude', ''):
96                         script = resource_filename('xinclude', href)
97                         exe.append(script)
98                     else:
99                         print "Script "+href+" in "+self._filename+" missing"
100                 if alang:
101                     exe.append("lang="+alang)
102                 if xpointer:
103                     exe.append("xptr="+xpointer)
104                 if exe == []:
105                     continue
106                 print "  executing %15s" % (href),
107                 ts = time()
108                 os.chdir(self._dirname)
109                 xml = subprocess.Popen(exe,stdout=subprocess.PIPE,
110                                        stderr=subprocess.PIPE)
111                 (stdout, stderr) = xml.communicate()
112                 #print xml.returnvalue
113 #                if stderr:
114 #                    warning("%s : %s" % (" ".join(exe),stderr))
115 #                    warning(stdout)
116 #                    exit
117                 os.chdir(cwd)
118                 te = time()
119                 print " [%5.2f s]  (%s)" % (round(te-ts,2),xpointer)
120                 xstr = etree.fromstring(stdout)
121 # inserting the generated code and remove the xinclude reference
122                 idp = c.getparent()
123                 idp.insert(idp.index(c)+1,xstr)
124                 idp.remove(c)
125
126     def xinclude(self):
127         self._doc.xinclude()
128
129     def collect_links(self):
130         res = []
131         for r in self._doc.xpath(u"//db:link[@xlink:href]",namespaces=const.XPATH):
132             rf = os.path.join(self._dirname,r.get(const.XLINK+'href'))
133             if os.path.isfile(rf):
134                 if r.get('security')=='encrypt':
135                     with open(rf, 'rb') as f:
136                         gpg = gnupg.GPG()
137                         status = gpg.encrypt_file(
138                         f, None, passphrase=getpass.getpass(rf+' password:'), symmetric=True,
139                         output=rf+'.gpg')
140                     r.set(const.XLINK+'href', r.get(const.XLINK+'href')+'.gpg')
141                     rf=rf+'.gpg'
142                 res.append(rf)
143         return res
144
145     def collect_images(self):
146         res = []
147         for i in self._doc.xpath(u"//db:imagedata[@fileref]",namespaces=const.XPATH):
148             im = os.path.join(self._dirname,i.get('fileref'))
149             if os.path.isfile(im):
150                 res.append(im)
151             else:
152                 print "WARNING: File "+im+" is missing!"
153         return res
154
155     def collect_videos(self):
156         res = []
157         for i in self._doc.xpath(u"//db:videodata[@fileref]",namespaces=const.XPATH):
158             im = os.path.join(self._dirname,i.get('fileref'))
159             if os.path.isfile(im):
160                 res.append(im)
161             else:
162                 print "WARNING: File "+im+" is missing!"
163         return res
164
165     def collect_forms(self):
166         res = []
167         for i in self._doc.xpath(u"//html:form[@action]",namespaces=const.XPATH):
168             pyscript = re.split('\.py',i.get('action'),1)[0]+'.py'
169             im = os.path.join(self._dirname,pyscript)
170             if os.path.isfile(im):
171                 res.append(im)
172         return res
173
174     def tostring(self):
175         return etree.tostring(self._doc,encoding='UTF-8',pretty_print=False)
176
177     def xslt(self,transform):
178         return etree.tostring(transform(self._doc))
179
180     def clean(self):
181         def recursively_empty(e):
182             if e.text:
183                 return False
184             return all((recursively_empty(c) for c in e.iterchildren()))
185
186         context = etree.iterwalk(self._doc)
187         for action, elem in context:
188             parent = elem.getparent()
189             if recursively_empty(elem):
190                 parent.remove(elem)