5 import treecutter.constants as const
9 """Class containing the state of the directory with articles"""
13 self._basepath = re.compile('[/\w\._-]*/[\w-]+',re.UNICODE)
16 for dirname, dirnames, filenames in os.walk(self._cwd):
17 for filename in filenames:
18 if fnmatch.fnmatch(filename, '*.xml'):
19 file_ = os.path.join(dirname,filename)
20 doc = etree.parse(file_)
21 title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
22 menu = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
24 base = self._basepath.match(file_).group()
25 link = base.replace('index','')[1:]
26 self._tree.append(link)
29 return set(self._tree)