page.py: added import re
[treecutter.git] / treecutter / directory.py
1 #!/usr/bin/python
2 import os
3 import fnmatch
4 from amara import bindery
5 import treecutter.constants as const
6
7 class Directory():
8     """Class containing the state of the directory with articles"""
9     def __init__(self):
10         self._cwd = '.'
11         self._tree = []
12
13     def scan(self):
14         for dirname, dirnames, filenames in os.walk(self._cwd):
15             for filename in filenames:
16                 if fnmatch.fnmatch(filename, '*.xml'):
17                     file_ = os.path.join(dirname,filename)
18                     doc = bindery.parse(file_, prefixes=const.PREFIXES)
19                     title = doc.xml_select(u'/db:article/db:info/db:title')
20                     menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
21                     if title and menu:
22                         base = file_.split('.')[1]
23                         link = base.replace('index','')
24                         self._tree.append(link)
25
26     def set(self):
27         return set(self._tree)