11e300eda9fbc0ff0d8ac6c4e718dfdb27e576dc
[treecutter.git] / treecutter / sitemap.py
1 #!/usr/bin/python
2 import os
3 import codecs
4 import re
5 import shutil
6 import sys
7 import gettext
8 import tempfile
9 from lxml import etree
10 from lxml.builder import ElementMaker
11 from time import time
12 from treecutter import constants as const
13 from treecutter.trie import Trie
14 from treecutter.link import Link
15 from treecutter.tools import ssh_cmd, publish, mkdir_p
16
17 class Sitemap():
18     """Class keeping the internal site structure"""
19     def __init__(self,args):
20         self._output = args.output
21         self._style = args.style
22         self._subdir = args.subdir
23         self._file = 'sitemap.txt'
24         self._tree = Trie()
25         self._sitelang = set()
26         self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
27         self._tranlang = {}
28         self._tmptarget = tempfile.mkdtemp()+'/'
29
30     # The sitemap uses a trie structure to keep track of links
31     # A link represents the path to the document and the link
32     # representing the text on the site.
33     # A link can have several pages in different languages.
34     def add_link(self, link):
35         tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
36         self._tree.add(tokens,Link(link))
37
38     def write_map(self):
39         f = codecs.open(self._file,'w','utf-8')
40         s = '\n'.join(link.link() for link in self._tree)
41         f.write(s)
42         f.close()
43
44     def read_map(self):
45         try:
46             f = codecs.open(self._file, 'r', 'utf-8')
47             sml = f.read().split()
48             f.close()
49             for line in sml:
50                 self.add_link(line)
51         except IOError, what_error:
52             print 'INFO: Could not read sitemap.txt - one will be created'
53
54     # Create a set of the current tree for comparison with the
55     # directory scan
56     def set(self):
57         return set(link.link() for link in self._tree)
58
59     # Main driver in the application processing the documents
60     # in the collected sitemap
61     def process(self):
62         t1 = time()
63         print "Prepareing the input"
64         for link in self._tree:
65             link.prepare()
66         t2 = time()
67         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
68         for link in self._tree:
69             self._sitelang = self._sitelang.union(set(link.languages()))
70         for tran in self._sitelang:
71             if tran != 'en':
72                 self._tranlang[tran] = gettext.translation('iso_639_3',
73                                                            languages=[tran])
74         t3 = time()
75         print "Language [%5.2f s]" % (round(t3-t2,2))
76         transform = {}
77         transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
78         for link in self._tree:
79             link.render(transform)
80         t4 = time()
81         print "Render   [%5.2f s]" % (round(t4-t3,2))
82         for link in self._tree:
83             link.template(self, self._style, self._tmptarget,self._subdir)
84         t5 = time()
85         print "Template [%5.2f s]" % (round(t5-t4,2))
86         t6 = time()
87         res = set()
88         # Collect all files used by the documents
89         for link in self._tree:
90             res = res.union(link.resources())
91         for f in res:
92             outfile = self._tmptarget+f
93             mkdir_p(os.path.dirname(outfile))
94             shutil.copyfile(f,outfile)
95         print "Resources[%5.2f s]" % (round(t6-t5,2))
96         # TODO: Improve the sitemap, it is a page that is generated from
97         #       the ground up and added a bit adhoc.
98         sitmaplink = Link('/sitemap')
99         for l in self._sitelang:
100             sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
101         for l in self._sitelang:
102             sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
103             sitmaplink.page(l).template(self,self._style,self._tmptarget,self._subdir)
104         t7 = time()
105         print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
106
107     def graph(self):
108         self._tree.graph()
109
110     def gen_menu(self,lang,page,cssclass):
111         return self._tree.menu(lang,page,cssclass,self._subdir)
112
113     def lang_menu(self,lang,link):
114         html = ElementMaker()
115         menu = html.ul()
116         for l in link.languages():
117             isoxml = u"//iso_639_3_entry[@*='"+l+"']"
118             ln = self._isocode.xpath(isoxml)[0].get('name')
119             if lang != 'en':
120                 ln = self._tranlang[lang].gettext(ln)
121             p = unicode(link.link())
122             if p[-1] == u'/':
123                 p = p +u'index'
124             p = p+u'.'+l
125             li = html.li(html.a(ln.decode('utf-8'),
126                                 href=self._subdir+p,hreflang=l))
127             menu.append(li)
128         return etree.tostring(menu,encoding='UTF-8',pretty_print=False)
129
130     def publish(self):
131         ssh_cmd(self._output,"mkdir -p")
132         publish(self._tmptarget, self._output)
133         for res in ["stylesheets","images","js","fonts","favicon.ico"]:
134             if (os.path.exists(self._style+res)):
135                 publish(self._style+res, self._output)
136         ssh_cmd(self._output,"chmod a+rx")