+ sitemap = []
+ try:
+ sfile = open('sitemap.txt')
+ flist = sfile.read().split()
+ sfile.close()
+ for f in flist:
+ sitemap.append(dict(link=f))
+ except IOError, what_error:
+ print 'Sitemap missing - generating one.'
+
+ for dirname, dirnames, filenames in os.walk('.'):
+ for filename in filenames:
+ if fnmatch.fnmatch(filename, '*.xml'):
+ xfile = os.path.join(dirname,filename)
+ doc = bindery.parse(xfile,
+ prefixes={u'db': u'http://docbook.org/ns/docbook',
+ u'xi': u'http://www.w3.org/2001/XInclude',
+ u'xl': u'http://www.w3.org/1999/xlink'})
+ title = doc.xml_select(u'/db:article/db:info/db:title')
+ menu = doc.xml_select(u'/db:article/db:info/db:titleabbrev')
+ code = doc.xml_select(u"//xi:include[@parse='text']")
+ resource = doc.xml_select(u"//db:link[@xl:href]")
+ image = doc.xml_select(u"//db:imagedata[@fileref]")
+ exe = 0
+ for c in code:
+ (p, ext) = os.path.splitext(c.href)
+ if ext in valid_scripts:
+ exe = 1
+
+ if title and menu:
+ found = 0
+ base = xfile.split('.')[1]
+ link = base.replace('index','')
+ level = len(filter(None,re.split(r'(^/\w*/|\w*/)',link)))
+ res = []
+ for r in resource:
+ rf = os.path.join(dirname,r.href)
+ if os.path.isfile(rf):
+ res.append(rf)
+ for i in image:
+ im = os.path.join(dirname,i.fileref)
+ if os.path.isfile(im):
+ res.append(im)
+ page = dict(title=unicode(doc.article.info.title),
+ menu=unicode(doc.article.info.titleabbrev),
+ output=os.path.join(dirname,
+ filename.replace('xml','html')),
+ exe=exe,
+ file=xfile,
+ res=res,
+ level=level)
+ for l in sitemap:
+ if l['link'] == link:
+ found = 1
+ l.update(page)
+ if not found:
+ print "adding "+link+" to sitemap"
+ dd = dict(link=link)
+ dd.update(page)
+ sitemap.append(dd)
+ sfile = open('sitemap.txt','w')
+ for l in sitemap:
+ sfile.write(l['link']+'\n')