This was done as directories with : were not allowed
in Windows, and switching to . caused problems with the links relying
on the index.langcode.xml format.
import fnmatch
from lxml import etree
import treecutter.constants as const
import fnmatch
from lxml import etree
import treecutter.constants as const
class Directory():
"""Class containing the state of the directory with articles"""
def __init__(self):
self._cwd = u'.'
self._tree = []
class Directory():
"""Class containing the state of the directory with articles"""
def __init__(self):
self._cwd = u'.'
self._tree = []
+ self._basepath = re.compile('[/\w\._-]*/\w+',re.UNICODE)
def scan(self):
for dirname, dirnames, filenames in os.walk(self._cwd):
def scan(self):
for dirname, dirnames, filenames in os.walk(self._cwd):
title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
menu = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
if title and menu:
title = doc.xpath(u'/db:article/db:info/db:title',namespaces=const.XPATH)
menu = doc.xpath(u'/db:article/db:info/db:titleabbrev',namespaces=const.XPATH)
if title and menu:
- base = file_.split('.')[1]
- link = base.replace('index','')
+ base = self._basepath.match(file_).group()
+ link = base.replace('index','')[1:]
self._tree.append(link)
def set(self):
self._tree.append(link)
def set(self):
import glob
from treecutter.page import Page
import glob
from treecutter.page import Page
self._link = link
# find the representations of the link.
self._pages = []
self._link = link
# find the representations of the link.
self._pages = []
+ self._langregexp = re.compile('.*\.(\w\w)\.xml')
path = link
if self._link[-1] == '/':
path = path+'index'
path = link
if self._link[-1] == '/':
path = path+'index'
def _scan_languages(self,path):
lang = []
def _scan_languages(self,path):
lang = []
- for l in glob.glob('.'+path+'*'):
- ls = l.split('.')
- if len(ls) > 3 and ls[3] == 'xml':
- lang.append((ls[2],l))
+ for l in glob.glob('.'+path+'*.xml'):
+ langcode = self._langregexp.search(l).group(1)
+ lang.append((langcode,l))
return lang
def link(self):
return lang
def link(self):
import codecs
import re
import shutil
import codecs
import re
import shutil
import gettext
import tempfile
from lxml import etree
import gettext
import tempfile
from lxml import etree
# representing the text on the site.
# A link can have several pages in different languages.
def add_link(self, link):
# representing the text on the site.
# A link can have several pages in different languages.
def add_link(self, link):
- tokens = filter(None,re.split(r'(^/[\w:-]*$|^/[\w:-]*/|[\w:-]*/)',link,flags=re.UNICODE))
+ tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
self._tree.add(tokens,Link(link))
def write_map(self):
self._tree.add(tokens,Link(link))
def write_map(self):