treecutter/sitemap.py

   1 #!/usr/bin/python
   2 import os
   3 import re
   4 import shutil
   5 import gettext
   6 import tempfile
   7 from amara import bindery
   8 from time import time
   9 from treecutter.trie import Trie
  10 from treecutter.link import Link
  11 from treecutter.tools import ssh_cmd, publish, mkdir_p
  12
  13 class Sitemap():
  14     """Class keeping the internal site structure"""
  15     def __init__(self):
  16         self._file = 'sitemap.txt'
  17         self._tree = Trie()
  18         self._sitelang = set()
  19         self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
  20         self._tranlang = {}
  21         self._tmptarget = tempfile.mkdtemp()+'/'
  22
  23     # The sitemap uses a trie structure to keep track of links
  24     # A link represents the path to the document and the link
  25     # representing the text on the site.
  26     # A link can have several pages in different languages.
  27     def add_link(self, link):
  28         tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
  29         self._tree.add(tokens,Link(link))
  30
  31     def write_map(self):
  32         f = open(self._file,'w')
  33         f.write('\n'.join(link.link() for link in self._tree))
  34         f.close()
  35
  36     def read_map(self):
  37         try:
  38             f = open(self._file)
  39             sml = f.read().split()
  40             f.close()
  41             for line in sml:
  42                 self.add_link(line)
  43         except IOError, what_error:
  44             print 'INFO: Could not read sitemap.txt - one will be created'
  45
  46     # Create a set of the current tree for comparison with the
  47     # directory scan
  48     def set(self):
  49         return set(link.link() for link in self._tree)
  50
  51     # Main driver in the application processing the documents
  52     # in the collected sitemap
  53     def process(self, style):
  54         t1 = time()
  55         print "Prepareing the input"
  56         for link in self._tree:
  57             link.prepare()
  58         t2 = time()
  59         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
  60         for link in self._tree:
  61             self._sitelang = self._sitelang.union(set(link.languages()))
  62         for tran in self._sitelang:
  63             if tran != 'en':
  64                 self._tranlang[tran] = gettext.translation('iso_639_3',
  65                                                            languages=[tran])
  66         t3 = time()
  67         print "Language [%5.2f s]" % (round(t3-t2,2))
  68         for link in self._tree:
  69             link.render(style)
  70         t4 = time()
  71         print "Render   [%5.2f s]" % (round(t4-t3,2))
  72         for link in self._tree:
  73             link.template(self, style, self._tmptarget)
  74         t5 = time()
  75         print "Template [%5.2f s]" % (round(t5-t4,2))
  76         t6 = time()
  77         res = set()
  78         # Collect all files used by the documents
  79         for link in self._tree:
  80             res = res.union(link.resources())
  81         for f in res:
  82             outfile = self._tmptarget+f
  83             mkdir_p(os.path.dirname(outfile))
  84             shutil.copyfile(f,outfile)
  85         print "Resources[%5.2f s]" % (round(t6-t5,2))
  86         # TODO: Improve the sitemap, it is a page that is generated from
  87         #       the ground up and added a bit adhoc.
  88         sitmaplink = Link('/sitemap')
  89         for l in self._sitelang:
  90             sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
  91         for l in self._sitelang:
  92             sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
  93             sitmaplink.page(l).template(self,style,self._tmptarget)
  94         t7 = time()
  95         print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
  96
  97     def graph(self):
  98         self._tree.graph()
  99
 100     def gen_menu(self,lang,page,cssclass):
 101         return self._tree.menu(lang,page,cssclass)
 102
 103     def lang_menu(self,lang,link):
 104         html = "<ul>"
 105         for l in link.languages():
 106             isoxml = u"//iso_639_3_entry[@*='"+l+"']"
 107             ln = self._isocode.xml_select(isoxml)[0].name
 108             if lang != 'en':
 109                 ln = self._tranlang[lang].gettext(ln)
 110             p = link.link()
 111             if p[-1] == '/':
 112                 p = p +'index'
 113             p = p+'.'+l
 114             html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
 115         html += "</ul>"
 116         return html
 117
 118     def publish(self,output,style):
 119         ssh_cmd(output,"mkdir -p")
 120         publish(self._tmptarget, output)
 121         for res in ["css","images","js","fonts","favicon.ico"]:
 122             if (os.path.exists(style+res)):
 123                 publish(style+res, output)
 124         ssh_cmd(output,"chmod a+rx")