import fnmatch
 from lxml import etree
 import treecutter.constants as const
+import re
 
 class Directory():
     """Class containing the state of the directory with articles"""
     def __init__(self):
         self._cwd = u'.'
         self._tree = []
+        self._basepath = re.compile('[/\w\._-]*/\w+',re.UNICODE)
 
     def scan(self):
         for dirname, dirnames, filenames in os.walk(self._cwd):
                     title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
                     menu  = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
                     if title and menu:
-                        base = file_.split('.')[1]
-                        link = base.replace('index','')
+                        base = self._basepath.match(file_).group()
+                        link = base.replace('index','')[1:]
                         self._tree.append(link)
 
     def set(self):
 
 #!/usr/bin/python
+import re
 import glob
 from treecutter.page import Page
 
         self._link = link
         # find the representations of the link.
         self._pages = []
+        self._langregexp = re.compile('.*\.(\w\w)\.xml')
         path = link
         if self._link[-1] == '/':
             path = path+'index'
 
     def _scan_languages(self,path):
         lang = []
-        for l in  glob.glob('.'+path+'*'):
-            ls = l.split('.')
-            if len(ls) > 3 and ls[3] == 'xml':
-                lang.append((ls[2],l))
+        for l in  glob.glob('.'+path+'*.xml'):
+            langcode = self._langregexp.search(l).group(1)
+            lang.append((langcode,l))
         return lang
 
     def link(self):
 
 import codecs
 import re
 import shutil
+import sys
 import gettext
 import tempfile
 from lxml import etree
     # representing the text on the site.
     # A link can have several pages in different languages.
     def add_link(self, link):
-        tokens = filter(None,re.split(r'(^/[\w:-]*$|^/[\w:-]*/|[\w:-]*/)',link,flags=re.UNICODE))
+        tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
         self._tree.add(tokens,Link(link))
 
     def write_map(self):