ab6642717fcdf4bda3004c7ac6a32aaf09a78d5f
[treecutter.git] / xinclude / table.py
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import codecs
7
8 from urlparse import urlparse
9 from lxml import etree
10 from lxml.builder import ElementMaker
11 from treecutter import constants as const
12
13 def linkify(text):
14     db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
15     r = re.search(r"(?P<url>https?://[^ ]+)\|(?P<title>[\w\-\.]+)", text)
16     if r:
17         rep = r.groups(r.group(1))
18         text = db.link(rep[1],**{const.XLINK+"href": rep[0]})
19     return text
20
21 class Table(object):
22     def __init__(self, tablefile, title):
23         self.tablefile  = tablefile
24         self.title = title
25         self.cols = []
26
27     def parse(self):
28         f = codecs.open(self.tablefile, encoding='utf-8')
29         for line in f:
30             c = re.split(r'\t+', line.rstrip())
31             self.cols.append(c)
32
33     def db_xml(self):
34         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
35         cols = self.cols
36         nrcol = str(len(cols[0]))
37         if cols[0][0][0] == '*':
38             cols[0][0] = cols[0][0][1:]
39             h = cols.pop(0)
40             row = db.row()
41             for e in h:
42                 row.append(db.entry(linkify(e), align="center"))
43             head = db.thead(row)
44         body = db.tbody()
45         for r in cols:
46             row = db.row()
47             body.append(row)
48             for e in r:
49                 row.append(db.entry(linkify(e)))
50         tab = db.table(db.title(self.title),
51                        db.tgroup(head,body,cols=nrcol,
52                                  colsep='1',rowsep='1',align='left'),
53                        frame='all')
54         return tab
55
56 if __name__ == "__main__":
57     for arg in sys.argv[1:]:
58         al = arg.split("=")
59         if al[0] == "lang":
60             lang = al[1]
61         if al[0] == "xptr":
62             argument = al[1].decode('utf-8')
63
64     (tablefile, title) = argument.split('|')
65     tab = Table(tablefile,title)
66     tab.parse()
67     txml = tab.db_xml()
68
69     sys.stdout.write(etree.tostring(txml,encoding='UTF-8',pretty_print=False))