Initial refactoring to use objects. Simple trie implementation to better
authorFredrik Unger <fred@tree.se>
Sun, 3 Apr 2011 19:26:57 +0000 (21:26 +0200)
committerFredrik Unger <fred@tree.se>
Sun, 3 Apr 2011 19:26:57 +0000 (21:26 +0200)
support the sitemap structure. Initial tests of the objects but
still keeping the initial working code.

src/tree-cutter.py

index 351f5d468f0a10f7b18fc6ecf4d67f7de14843fa..97ac6a2a65f15fdb7bf00c3fa09ba7c12b3f7ff2 100755 (executable)
@@ -9,6 +9,7 @@ import errno
 import time
 import argparse
 import shutil
+import pygraphviz as pgv
 from amara import bindery
 from amara.xslt import transform
 from Cheetah.Template import Template
@@ -41,6 +42,125 @@ def publish(src,target):
     if retcode:
         print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
 
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+          u'xi': u'http://www.w3.org/2001/XInclude',
+          u'xl': u'http://www.w3.org/1999/xlink'}
+
+class Directory():
+    """Class containing the state of the directory with articles"""
+    def __init__(self):
+        self._cwd = '.'
+        self._tree = []
+
+    def scan(self):
+        for dirname, dirnames, filenames in os.walk(self._cwd):
+            for filename in filenames:
+                if fnmatch.fnmatch(filename, '*.xml'):
+                    file_ = os.path.join(dirname,filename)
+                    doc = bindery.parse(file_, prefixes=PREFIXES)
+                    title = doc.xml_select(u'/db:article/db:info/db:title')
+                    menu  = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+                    if title and menu:
+                        base = file_.split('.')[1]
+                        link = base.replace('index','')
+                        self._tree.append(link)
+
+    def set(self):
+        return set(self._tree)
+
+class Page():
+    """Class representing a webpage on the site"""
+    def __init__(self,link):
+        self._link = link
+        self._resources = []
+        self._script = 0
+
+    def link(self):
+        return self._link
+
+class Node():
+    def __init__(self,token,value):
+        self._token = token
+        self._value = value
+        self._children = []
+
+    def token(self):
+        return self._token
+
+    def value(self):
+        return self._value
+
+    def children(self):
+        return self._children
+
+class Trie():
+    def __init__(self):
+        self._root = []
+
+    def _add(self,trie, key, content):
+        # is the key a leaf
+        k = key.pop(0)
+        if key == []:
+            node = Node(k,content)
+            trie.append(node)
+        else:
+            for ch in trie:
+                if ch.token() == k:
+                    self._add(ch.children(), key, content)
+
+    def add(self,key, content):
+        self._add(self._root, key, content)
+
+    def _graph(self, trie, G):
+        for l in trie:
+            G.add_node(l.token())
+            for ch in l.children():
+                G.add_edge(l.token(),ch.token())
+                self._graph(l.children(), G)
+
+    def graph(self):
+        G = pgv.AGraph(directed=True)
+        G.add_node("sitemap")
+        for ch in self._root:
+            G.add_edge("sitemap",ch.token())
+        self._graph(self._root, G)
+#        G.layout('dot')
+#        G.draw('g.png')
+        print G.string()
+
+class Sitemap():
+    """Class keeping the internal site structure"""
+    def __init__(self):
+        self._file = 'sitemap.txt'
+        self._pages = []
+        self._tree = Trie()
+
+    def add_page(self, link):
+        page = Page(link)
+        self._pages.append(page)
+        tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
+        self._tree.add(tokens,page)
+
+    def read_map(self):
+        try:
+            f = open(self._file)
+            sml = f.read().split()
+            f.close()
+            for line in sml:
+                self.add_page(line)
+        except IOError, what_error:
+            print 'INFO: Could not read sitemap.txt - one will be created'
+
+    def set(self):
+        return set(page.link() for page in self._pages)
+
+    def pages(self):
+        return self._pages
+
+    def graph(self):
+        self._tree.graph()
+
 def generateSitemap():
     sitemap = []
     try:
@@ -223,6 +343,22 @@ def createSitemap(sitemap):
     out.write(str(template))
     out.close()
 
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+    print removed+' pages missing!!'
+
+for page in missing:
+    print 'adding missing page '+page
+    sitemap.add_page(page)
+
+sitemap.graph()
 
 
 sitemap = generateSitemap()