table: adding link support in simple tables
[treecutter.git] / xinclude / table.py
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import codecs
7
8 from urlparse import urlparse
9 from email.utils import parseaddr
10 from lxml import etree
11 from lxml.builder import ElementMaker
12 from treecutter import constants as const
13
14 def append_text(tree, text):
15     children = tree.getchildren()
16     if children:
17         if children[-1].tail is None:
18             children[-1].tail = text
19         else:
20             children[-1].tail += text
21     else:
22         if tree.text is None:
23             tree.text = text
24         else:
25             tree.text += text
26     return tree
27
28 def linkify(text):
29     db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
30     ent = db.entry(align="center")
31     r = re.search(r"(?P<url>https?://[^ ]+)\|(?P<title>[\w\-\.]+)", text)
32     if r:
33         rep = r.groups(r.group(1))
34         ent.append(db.link(rep[1],**{const.XLINK+"href": rep[0]}))
35     ts = text.split(',')
36     c = 0
37     for t in ts:
38         c = c + 1
39         n = parseaddr(t)
40         if n[0] != '' and n[1] != '':
41             ent.append(db.address(db.personname(db.firstname(n[0].split(' ')[0]), db.surname(n[0].split(' ')[1])),db.email(n[1])))
42         else:
43             append_text(ent,t)
44         if c<len(ts):
45             append_text(ent,',')
46     return ent
47
48 class Table(object):
49     def __init__(self, tablefile, title):
50         self.tablefile  = tablefile
51         self.title = title
52         self.cols = []
53
54     def parse(self):
55         f = codecs.open(self.tablefile, encoding='utf-8')
56         for line in f:
57             c = re.split(r'\t+', line.rstrip())
58             self.cols.append(c)
59
60     def db_xml(self):
61         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
62         cols = self.cols
63         nrcol = str(len(cols[0]))
64         if cols[0][0][0] == '*':
65             cols[0][0] = cols[0][0][1:]
66             h = cols.pop(0)
67             row = db.row()
68             for e in h:
69                 row.append(linkify(e))
70             head = db.thead(row)
71         body = db.tbody()
72         for r in cols:
73             row = db.row()
74             body.append(row)
75             for e in r:
76                 row.append(linkify(e))
77         tab = db.table(db.title(self.title),
78                        db.tgroup(head,body,cols=nrcol,
79                                  colsep='1',rowsep='1',align='left'),
80                        frame='all')
81         return tab
82
83 if __name__ == "__main__":
84     for arg in sys.argv[1:]:
85         al = arg.split("=")
86         if al[0] == "lang":
87             lang = al[1]
88         if al[0] == "xptr":
89             argument = al[1].decode('utf-8')
90
91     (tablefile, title) = argument.split('|')
92     tab = Table(tablefile,title)
93     tab.parse()
94     txml = tab.db_xml()
95
96     sys.stdout.write(etree.tostring(txml,encoding='UTF-8',pretty_print=False))