xinclude/table.py

   1 #!/usr/bin/python
   2 # -*- coding: utf-8 -*-
   3
   4 import sys
   5 import re
   6 import codecs
   7
   8 from urlparse import urlparse
   9 from lxml import etree
  10 from lxml.builder import ElementMaker
  11 from treecutter import constants as const
  12
  13 def linkify(text):
  14     db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  15     r = re.search(r"(?P<url>https?://[^ ]+)\|(?P<title>[\w\-\.]+)", text)
  16     if r:
  17         rep = r.groups(r.group(1))
  18         text = db.link(rep[1],**{const.XLINK+"href": rep[0]})
  19     return text
  20
  21 class Table(object):
  22     def __init__(self, tablefile, title):
  23         self.tablefile  = tablefile
  24         self.title = title
  25         self.cols = []
  26
  27     def parse(self):
  28         f = codecs.open(self.tablefile, encoding='utf-8')
  29         for line in f:
  30             c = re.split(r'\t+', line.rstrip())
  31             self.cols.append(c)
  32
  33     def db_xml(self):
  34         db = ElementMaker(namespace=const.DB_NS, nsmap=const.NSMAP)
  35         cols = self.cols
  36         nrcol = str(len(cols[0]))
  37         if cols[0][0][0] == '*':
  38             cols[0][0] = cols[0][0][1:]
  39             h = cols.pop(0)
  40             row = db.row()
  41             for e in h:
  42                 row.append(db.entry(linkify(e), align="center"))
  43             head = db.thead(row)
  44         body = db.tbody()
  45         for r in cols:
  46             row = db.row()
  47             body.append(row)
  48             for e in r:
  49                 row.append(db.entry(linkify(e)))
  50         tab = db.table(db.title(self.title),
  51                        db.tgroup(head,body,cols=nrcol,
  52                                  colsep='1',rowsep='1',align='left'),
  53                        frame='all')
  54         return tab
  55
  56 if __name__ == "__main__":
  57     for arg in sys.argv[1:]:
  58         al = arg.split("=")
  59         if al[0] == "lang":
  60             lang = al[1]
  61         if al[0] == "xptr":
  62             argument = al[1].decode('utf-8')
  63
  64     (tablefile, title) = argument.split('|')
  65     tab = Table(tablefile,title)
  66     tab.parse()
  67     txml = tab.db_xml()
  68
  69     sys.stdout.write(etree.tostring(txml,encoding='UTF-8',pretty_print=False))