import sys
import re
+import codecs
from urlparse import urlparse
from lxml import etree
self.cols = []
def parse(self):
- f = file(self.tablefile, 'r')
+ f = codecs.open(self.tablefile, encoding='utf-8')
for line in f:
- c = line.split()
+ c = re.split(r'\t+', line.rstrip())
self.cols.append(c)
def db_xml(self):