347440b777eb8418158ed90d21ceaa0ac139b4fd
[treecutter.git] / treecutter / sitemap.py
1 #!/usr/bin/python
2 import os
3 import codecs
4 import re
5 import shutil
6 import sys
7 import gettext
8 import tempfile
9 from lxml import etree
10 from lxml.builder import ElementMaker
11 from time import time
12 from treecutter import constants as const
13 from treecutter.trie import Trie
14 from treecutter.link import Link
15 from treecutter.tools import ssh_cmd, publish, mkdir_p,get_folder_size,sizeof_fmt
16
17
18 class Sitemap():
19     """Class keeping the internal site structure"""
20     def __init__(self,args):
21         self._output = args.output
22         self._style = args.style
23         self._subdir = args.subdir
24         self._file = 'sitemap.txt'
25         self._tree = Trie()
26         self._sitelang = set()
27         self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
28         self._tranlang = {}
29         self._tmptarget = tempfile.mkdtemp()+'/'
30
31     # The sitemap uses a trie structure to keep track of links
32     # A link represents the path to the document and the link
33     # representing the text on the site.
34     # A link can have several pages in different languages.
35     def add_link(self, link):
36         tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
37         self._tree.add(tokens,Link(link))
38
39     def write_map(self):
40         f = codecs.open(self._file,'w','utf-8')
41         s = '\n'.join(link.link() for link in self._tree)
42         f.write(s)
43         f.close()
44
45     def read_map(self):
46         try:
47             f = codecs.open(self._file, 'r', 'utf-8')
48             sml = f.read().split()
49             f.close()
50             for line in sml:
51                 self.add_link(line)
52         except IOError, what_error:
53             print 'INFO: Could not read sitemap.txt - one will be created'
54
55     # Create a set of the current tree for comparison with the
56     # directory scan
57     def set(self):
58         return set(link.link() for link in self._tree)
59
60     def linklist(self):
61         return [link.link() for link in self._tree]
62
63     # Main driver in the application processing the documents
64     # in the collected sitemap
65     def process(self):
66         t1 = time()
67         for link in self._tree:
68             link.prepare()
69         t2 = time()
70         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
71         for link in self._tree:
72             self._sitelang = self._sitelang.union(set(link.languages()))
73         for tran in self._sitelang:
74             if tran != 'en':
75                 self._tranlang[tran] = gettext.translation('iso_639_3',
76                                                            languages=[tran])
77         t3 = time()
78         print "Language [%5.2f s]" % (round(t3-t2,2))
79         transform = {}
80         transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
81         for link in self._tree:
82             link.render(transform)
83         t4 = time()
84         print "Render   [%5.2f s]" % (round(t4-t3,2))
85         for link in self._tree:
86             link.template(self, self._style, self._tmptarget,self._subdir)
87         t5 = time()
88         print "Template [%5.2f s]" % (round(t5-t4,2))
89         t6 = time()
90         res = set()
91         # Collect all files used by the documents
92         for link in self._tree:
93             res = res.union(link.resources())
94         for f in res:
95             outfile = self._tmptarget+f
96             mkdir_p(os.path.dirname(outfile))
97             shutil.copyfile(f,outfile)
98         print "Resources[%5.2f s]" % (round(t6-t5,2))
99         # TODO: Improve the sitemap, it is a page that is generated from
100         #       the ground up and added a bit adhoc.
101         sitmaplink = Link('/sitemap')
102         for l in self._sitelang:
103             sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
104         for l in self._sitelang:
105             sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
106             sitmaplink.page(l).template(self,self._style,self._tmptarget,self._subdir)
107         t7 = time()
108         print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
109
110     def graph(self):
111         self._tree.graph()
112
113     def gen_menu(self,lang,page,cssclass):
114         return self._tree.menu(lang,page,cssclass,self._subdir)
115
116     def lang_menu(self,lang,link):
117         html = ElementMaker()
118         menu = html.ul()
119         for l in link.languages():
120             isoxml = u"//iso_639_3_entry[@*='"+l+"']"
121             ln = self._isocode.xpath(isoxml)[0].get('name')
122             if lang != 'en':
123                 ln = self._tranlang[lang].gettext(ln)
124             p = unicode(link.link())
125             if p[-1] == u'/':
126                 p = p +u'index'
127             p = p+u'.'+l
128             li = html.li(html.a(ln.decode('utf-8'),
129                                 href=self._subdir+p,hreflang=l))
130             menu.append(li)
131         return etree.tostring(menu,encoding='UTF-8',pretty_print=False)
132
133     def publish(self):
134         print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._tmptarget)))
135         ssh_cmd(self._output,"mkdir -p")
136         publish(self._tmptarget, self._output)
137         for res in ["stylesheets","images","js","fonts","favicon.ico"]:
138             if (os.path.exists(self._style+res)):
139                 publish(self._style+res, self._output)
140         ssh_cmd(self._output,"chmod a+rx")