#!/usr/bin/python
import os
import fnmatch
-from amara import bindery
-import treecutter.const as const
+from lxml import etree
+import treecutter.constants as const
+import re
class Directory():
"""Class containing the state of the directory with articles"""
def __init__(self):
- self._cwd = '.'
+ self._cwd = u'.'
self._tree = []
+ self._basepath = re.compile('[/\w\._-]*/[\w-]+',re.UNICODE)
def scan(self):
for dirname, dirnames, filenames in os.walk(self._cwd):
for filename in filenames:
if fnmatch.fnmatch(filename, '*.xml'):
file_ = os.path.join(dirname,filename)
- doc = bindery.parse(file_, prefixes=const.PREFIXES)
- title = doc.xml_select(u'/db:article/db:info/db:title')
- menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ doc = etree.parse(file_)
+ title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+ menu = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
if title and menu:
- base = file_.split('.')[1]
- link = base.replace('index','')
+ base = self._basepath.match(file_).group()
+ link = base.replace('index','')[1:]
self._tree.append(link)
def set(self):