xml: change from amara to lxml
[treecutter.git] / treecutter / directory.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 from lxml import etree
5 import treecutter.constants as const
6
7 class Directory():
8     """Class containing the state of the directory with articles"""
9     def __init__(self):
10         self._cwd = '.'
11         self._tree = []
12
13     def scan(self):
14         for dirname, dirnames, filenames in os.walk(self._cwd):
15             for filename in filenames:
16                 if fnmatch.fnmatch(filename, '*.xml'):
17                     file_ = os.path.join(dirname,filename)
18                     doc = etree.parse(file_)
19                     title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
20                     menu  = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
21                     if title and menu:
22                         base = file_.split('.')[1]
23                         link = base.replace('index','')
24                         self._tree.append(link)
25
26     def set(self):
27         return set(self._tree)