import time
import argparse
import shutil
+import pygraphviz as pgv
from amara import bindery
from amara.xslt import transform
from Cheetah.Template import Template
if retcode:
print 'Error: '+' '.join(cmd)+' Returncode ['+str(retcode)+']'
+
+PREFIXES={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink'}
+
+class Directory():
+ """Class containing the state of the directory with articles"""
+ def __init__(self):
+ self._cwd = '.'
+ self._tree = []
+
+ def scan(self):
+ for dirname, dirnames, filenames in os.walk(self._cwd):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ file_ = os.path.join(dirname,filename)
+ doc = bindery.parse(file_, prefixes=PREFIXES)
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ if title and menu:
+ base = file_.split('.')[1]
+ link = base.replace('index','')
+ self._tree.append(link)
+
+ def set(self):
+ return set(self._tree)
+
+class Page():
+ """Class representing a webpage on the site"""
+ def __init__(self,link):
+ self._link = link
+ self._resources = []
+ self._script = 0
+
+ def link(self):
+ return self._link
+
+class Node():
+ def __init__(self,token,value):
+ self._token = token
+ self._value = value
+ self._children = []
+
+ def token(self):
+ return self._token
+
+ def value(self):
+ return self._value
+
+ def children(self):
+ return self._children
+
+class Trie():
+ def __init__(self):
+ self._root = []
+
+ def _add(self,trie, key, content):
+ # is the key a leaf
+ k = key.pop(0)
+ if key == []:
+ node = Node(k,content)
+ trie.append(node)
+ else:
+ for ch in trie:
+ if ch.token() == k:
+ self._add(ch.children(), key, content)
+
+ def add(self,key, content):
+ self._add(self._root, key, content)
+
+ def _graph(self, trie, G):
+ for l in trie:
+ G.add_node(l.token())
+ for ch in l.children():
+ G.add_edge(l.token(),ch.token())
+ self._graph(l.children(), G)
+
+ def graph(self):
+ G = pgv.AGraph(directed=True)
+ G.add_node("sitemap")
+ for ch in self._root:
+ G.add_edge("sitemap",ch.token())
+ self._graph(self._root, G)
+# G.layout('dot')
+# G.draw('g.png')
+ print G.string()
+
+class Sitemap():
+ """Class keeping the internal site structure"""
+ def __init__(self):
+ self._file = 'sitemap.txt'
+ self._pages = []
+ self._tree = Trie()
+
+ def add_page(self, link):
+ page = Page(link)
+ self._pages.append(page)
+ tokens = filter(None,re.split(r'(^/\w*/|\w*/)',link))
+ self._tree.add(tokens,page)
+
+ def read_map(self):
+ try:
+ f = open(self._file)
+ sml = f.read().split()
+ f.close()
+ for line in sml:
+ self.add_page(line)
+ except IOError, what_error:
+ print 'INFO: Could not read sitemap.txt - one will be created'
+
+ def set(self):
+ return set(page.link() for page in self._pages)
+
+ def pages(self):
+ return self._pages
+
+ def graph(self):
+ self._tree.graph()
+
def generateSitemap():
sitemap = []
try:
out.write(str(template))
out.close()
+dir_ = Directory()
+sitemap = Sitemap()
+
+dir_.scan()
+sitemap.read_map()
+
+missing = dir_.set() - sitemap.set()
+removed = sitemap.set() - dir_.set()
+for page in removed:
+ print removed+' pages missing!!'
+
+for page in missing:
+ print 'adding missing page '+page
+ sitemap.add_page(page)
+
+sitemap.graph()
sitemap = generateSitemap()