sitemap/resource: adding minus in path, adding video
[treecutter.git] / treecutter / directory.py
index dc60c379718e4a016c5fecc55eeb75610c5def31..1b7a3e9f3ee6562ff9bfedd51ea0bdac409b1d7c 100644 (file)
@@ -1,26 +1,28 @@
 #!/usr/bin/python
 import os
 import fnmatch
-from amara import bindery
+from lxml import etree
 import treecutter.constants as const
+import re
 
 class Directory():
     """Class containing the state of the directory with articles"""
     def __init__(self):
-        self._cwd = '.'
+        self._cwd = u'.'
         self._tree = []
+        self._basepath = re.compile('[/\w\._-]*/[\w-]+',re.UNICODE)
 
     def scan(self):
         for dirname, dirnames, filenames in os.walk(self._cwd):
             for filename in filenames:
                 if fnmatch.fnmatch(filename, '*.xml'):
                     file_ = os.path.join(dirname,filename)
-                    doc = bindery.parse(file_, prefixes=const.PREFIXES)
-                    title = doc.xml_select(u'/db:article/db:info/db:title')
-                    menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+                    doc = etree.parse(file_)
+                    title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+                    menu  = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
                     if title and menu:
-                        base = file_.split('.')[1]
-                        link = base.replace('index','')
+                        base = self._basepath.match(file_).group()
+                        link = base.replace('index','')[1:]
                         self._tree.append(link)
 
     def set(self):