xinclude/table.py

   1 #!/usr/bin/python
   2 # -*- coding: utf-8 -*-
   3
   4 import sys
   5 import re
   6 import codecs
   7
   8 from urlparse import urlparse
   9 from email.utils import parseaddr
  10 from lxml import etree
  11 from lxml.builder import ElementMaker
  12 from treecutter import constants as const
  13
  14 def append_text(tree, text):
  15     children = tree.getchildren()
  16     if children:
  17         if children[-1].tail is None:
  18             children[-1].tail = text
  19         else:
  20             children[-1].tail += text
  21     else:
  22         if tree.text is None:
  23             tree.text = text
  24         else:
  25             tree.text += text
  26     return tree
  27
  28 def linkify(text):
  29     db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  30     ent = db.entry(align="center")
  31     r = re.search(r"(?P<url>https?://[^ ]+)\|(?P<title>[\w\-\.]+)", text)
  32     if r:
  33         rep = r.groups(r.group(1))
  34         ent.append(db.link(rep[1],**{const.XLINK+"href": rep[0]}))
  35     ts = text.split(',')
  36     c = 0
  37     for t in ts:
  38         c = c + 1
  39         n = parseaddr(t)
  40         if n[0] != '' and n[1] != '':
  41             ent.append(db.address(db.personname(db.firstname(n[0].split(' ')[0]), db.surname(n[0].split(' ')[1])),db.email(n[1])))
  42         else:
  43             append_text(ent,t)
  44         if c<len(ts):
  45             append_text(ent,',')
  46     return ent
  47
  48 class Table(object):
  49     def __init__(self, tablefile, title):
  50         self.tablefile  = tablefile
  51         self.title = title
  52         self.cols = []
  53
  54     def parse(self):
  55         f = codecs.open(self.tablefile, encoding='utf-8')
  56         for line in f:
  57             c = re.split(r'\t+', line.rstrip())
  58             self.cols.append(c)
  59
  60     def db_xml(self):
  61         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  62         cols = self.cols
  63         nrcol = str(len(cols[0]))
  64         if cols[0][0][0] == '*':
  65             cols[0][0] = cols[0][0][1:]
  66             h = cols.pop(0)
  67             row = db.row()
  68             for e in h:
  69                 row.append(linkify(e))
  70             head = db.thead(row)
  71         body = db.tbody()
  72         for r in cols:
  73             row = db.row()
  74             body.append(row)
  75             for e in r:
  76                 row.append(linkify(e))
  77         tab = db.table(db.title(self.title),
  78                        db.tgroup(head,body,cols=nrcol,
  79                                  colsep='1',rowsep='1',align='left'),
  80                        frame='all')
  81         return tab
  82
  83 if __name__ == "__main__":
  84     for arg in sys.argv[1:]:
  85         al = arg.split("=")
  86         if al[0] == "lang":
  87             lang = al[1]
  88         if al[0] == "xptr":
  89             argument = al[1].decode('utf-8')
  90
  91     (tablefile, title) = argument.split('|')
  92     tab = Table(tablefile,title)
  93     tab.parse()
  94     txml = tab.db_xml()
  95
  96     sys.stdout.write(etree.tostring(txml,encoding='UTF-8',pretty_print=False))