path: adding support for . in directoies
[treecutter.git] / treecutter / directory.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 from lxml import etree
5 import treecutter.constants as const
6 import re
7
8 class Directory():
9     """Class containing the state of the directory with articles"""
10     def __init__(self):
11         self._cwd = u'.'
12         self._tree = []
13         self._basepath = re.compile('[/\w\._-]*/\w+',re.UNICODE)
14
15     def scan(self):
16         for dirname, dirnames, filenames in os.walk(self._cwd):
17             for filename in filenames:
18                 if fnmatch.fnmatch(filename, '*.xml'):
19                     file_ = os.path.join(dirname,filename)
20                     doc = etree.parse(file_)
21                     title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
22                     menu  = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
23                     if title and menu:
24                         base = self._basepath.match(file_).group()
25                         link = base.replace('index','')[1:]
26                         self._tree.append(link)
27
28     def set(self):
29         return set(self._tree)