xml: change from amara to lxml

[treecutter.git] / treecutter / sitemap.py
diff --git a/treecutter/sitemap.py b/treecutter/sitemap.py

index 7bd81e9a1097da7bb0041c8c56b8cf6e94a4b32c..a8612f23b9ba9b1c1955bd370af771b59a008b9f 100644 (file)
--- a/treecutter/sitemap.py
+++ b/treecutter/sitemap.py
@@ -1,11 +1,13 @@
  #!/usr/bin/python
  import os
  import re
-import time
  import shutil
  import gettext
  import tempfile
-from amara import bindery
+from lxml import etree
+from lxml.builder import ElementMaker
+from time import time
+from treecutter import constants as const
  from treecutter.trie import Trie
  from treecutter.link import Link
  from treecutter.tools import ssh_cmd, publish, mkdir_p
@@ -16,10 +18,14 @@ class Sitemap():
          self._file = 'sitemap.txt'
          self._tree = Trie()
          self._sitelang = set()
-        self._isocode = bindery.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
+        self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
          self._tranlang = {}
          self._tmptarget = tempfile.mkdtemp()+'/'
  
+    # The sitemap uses a trie structure to keep track of links
+    # A link represents the path to the document and the link
+    # representing the text on the site.
+    # A link can have several pages in different languages.
      def add_link(self, link):
          tokens = filter(None,re.split(r'(^/[\w-]*/|[\w-]*/)',link))
          self._tree.add(tokens,Link(link))
@@ -39,14 +45,19 @@ class Sitemap():
          except IOError, what_error:
              print 'INFO: Could not read sitemap.txt - one will be created'
  
+    # Create a set of the current tree for comparison with the
+    # directory scan
      def set(self):
          return set(link.link() for link in self._tree)
  
+    # Main driver in the application processing the documents
+    # in the collected sitemap
      def process(self, style):
-        t1 = time.time()
+        t1 = time()
+        print "Prepareing the input"
          for link in self._tree:
              link.prepare()
-        t2 = time.time()
+        t2 = time()
          print "Prepare  [%5.2f s]" % (round(t2-t1,2))
          for link in self._tree:
              self._sitelang = self._sitelang.union(set(link.languages()))
@@ -54,19 +65,19 @@ class Sitemap():
              if tran != 'en':
                  self._tranlang[tran] = gettext.translation('iso_639_3',
                                                             languages=[tran])
-        t3 = time.time()
+        t3 = time()
          print "Language [%5.2f s]" % (round(t3-t2,2))
          for link in self._tree:
              link.render(style)
-        t4 = time.time()
+        t4 = time()
          print "Render   [%5.2f s]" % (round(t4-t3,2))
          for link in self._tree:
              link.template(self, style, self._tmptarget)
-        t5 = time.time()
+        t5 = time()
          print "Template [%5.2f s]" % (round(t5-t4,2))
-        t6 = time.time()
+        t6 = time()
          res = set()
-        cwd = os.getcwd()
+        # Collect all files used by the documents
          for link in self._tree:
              res = res.union(link.resources())
          for f in res:
@@ -74,13 +85,15 @@ class Sitemap():
              mkdir_p(os.path.dirname(outfile))
              shutil.copyfile(f,outfile)
          print "Resources[%5.2f s]" % (round(t6-t5,2))
+        # TODO: Improve the sitemap, it is a page that is generated from
+        #       the ground up and added a bit adhoc.
          sitmaplink = Link('/sitemap')
          for l in self._sitelang:
              sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
          for l in self._sitelang:
              sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
              sitmaplink.page(l).template(self,style,self._tmptarget)
-        t7 = time.time()
+        t7 = time()
          print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
  
      def graph(self):
@@ -90,24 +103,25 @@ class Sitemap():
          return self._tree.menu(lang,page,cssclass)
  
      def lang_menu(self,lang,link):
-        html = "<ul>"
+        html = ElementMaker(namespace=const.HTML_NS)
+        menu = html.ul()
          for l in link.languages():
              isoxml = u"//iso_639_3_entry[@*='"+l+"']"
-            ln = self._isocode.xml_select(isoxml)[0].name
+            ln = self._isocode.xpath(isoxml)[0].get('name')
              if lang != 'en':
                  ln = self._tranlang[lang].gettext(ln)
              p = link.link()
              if p[-1] == '/':
                  p = p +'index'
              p = p+'.'+l
-            html += '<li><a href="%s" hreflang="%s">%s</a></li>' % (p, l, ln)
-        html += "</ul>"
-        return html
+            li = html.li(html.a(ln,href=p,hreflang=l))
+            menu.append(li)
+        return etree.tostring(menu,encoding='UTF-8',pretty_print=False)
  
      def publish(self,output,style):
          ssh_cmd(output,"mkdir -p")
          publish(self._tmptarget, output)
-        for res in ["css","images","js","favicon.ico"]:
+        for res in ["css","images","js","fonts","favicon.ico"]:
              if (os.path.exists(style+res)):
                  publish(style+res, output)
          ssh_cmd(output,"chmod a+rx")