sitemap/resource: adding minus in path, adding video

[treecutter.git] / treecutter / directory.py
diff --git a/treecutter/directory.py b/treecutter/directory.py

index dc60c379718e4a016c5fecc55eeb75610c5def31..1b7a3e9f3ee6562ff9bfedd51ea0bdac409b1d7c 100644 (file)
--- a/treecutter/directory.py
+++ b/treecutter/directory.py
@@ -1,26 +1,28 @@
  #!/usr/bin/python
  import os
  import fnmatch
-from amara import bindery
+from lxml import etree
  import treecutter.constants as const
+import re
  
  class Directory():
      """Class containing the state of the directory with articles"""
      def __init__(self):
-        self._cwd = '.'
+        self._cwd = u'.'
          self._tree = []
+        self._basepath = re.compile('[/\w\._-]*/[\w-]+',re.UNICODE)
  
      def scan(self):
          for dirname, dirnames, filenames in os.walk(self._cwd):
              for filename in filenames:
                  if fnmatch.fnmatch(filename, '*.xml'):
                      file_ = os.path.join(dirname,filename)
-                    doc = bindery.parse(file_, prefixes=const.PREFIXES)
-                    title = doc.xml_select(u'/db:article/db:info/db:title')
-                    menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+                    doc = etree.parse(file_)
+                    title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
+                    menu  = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
                      if title and menu:
-                        base = file_.split('.')[1]
-                        link = base.replace('index','')
+                        base = self._basepath.match(file_).group()
+                        link = base.replace('index','')[1:]
                          self._tree.append(link)
  
      def set(self):