sitemap: adding sizecalculation
[treecutter.git] / treecutter / sitemap.py
1 #!/usr/bin/python
2 import os
3 import codecs
4 import re
5 import shutil
6 import sys
7 import gettext
8 import tempfile
9 from lxml import etree
10 from lxml.builder import ElementMaker
11 from time import time
12 from treecutter import constants as const
13 from treecutter.trie import Trie
14 from treecutter.link import Link
15 from treecutter.tools import ssh_cmd, publish, mkdir_p,get_folder_size,sizeof_fmt
16
17
18 class Sitemap():
19     """Class keeping the internal site structure"""
20     def __init__(self,args):
21         self._output = args.output
22         self._style = args.style
23         self._subdir = args.subdir
24         self._file = 'sitemap.txt'
25         self._tree = Trie()
26         self._sitelang = set()
27         self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
28         self._tranlang = {}
29         self._tmptarget = tempfile.mkdtemp()+'/'
30
31     # The sitemap uses a trie structure to keep track of links
32     # A link represents the path to the document and the link
33     # representing the text on the site.
34     # A link can have several pages in different languages.
35     def add_link(self, link):
36         tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
37         self._tree.add(tokens,Link(link))
38
39     def write_map(self):
40         f = codecs.open(self._file,'w','utf-8')
41         s = '\n'.join(link.link() for link in self._tree)
42         f.write(s)
43         f.close()
44
45     def read_map(self):
46         try:
47             f = codecs.open(self._file, 'r', 'utf-8')
48             sml = f.read().split()
49             f.close()
50             for line in sml:
51                 self.add_link(line)
52         except IOError, what_error:
53             print 'INFO: Could not read sitemap.txt - one will be created'
54
55     # Create a set of the current tree for comparison with the
56     # directory scan
57     def set(self):
58         return set(link.link() for link in self._tree)
59
60     # Main driver in the application processing the documents
61     # in the collected sitemap
62     def process(self):
63         t1 = time()
64         print "Prepareing the input"
65         for link in self._tree:
66             link.prepare()
67         t2 = time()
68         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
69         for link in self._tree:
70             self._sitelang = self._sitelang.union(set(link.languages()))
71         for tran in self._sitelang:
72             if tran != 'en':
73                 self._tranlang[tran] = gettext.translation('iso_639_3',
74                                                            languages=[tran])
75         t3 = time()
76         print "Language [%5.2f s]" % (round(t3-t2,2))
77         transform = {}
78         transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
79         for link in self._tree:
80             link.render(transform)
81         t4 = time()
82         print "Render   [%5.2f s]" % (round(t4-t3,2))
83         for link in self._tree:
84             link.template(self, self._style, self._tmptarget,self._subdir)
85         t5 = time()
86         print "Template [%5.2f s]" % (round(t5-t4,2))
87         t6 = time()
88         res = set()
89         # Collect all files used by the documents
90         for link in self._tree:
91             res = res.union(link.resources())
92         for f in res:
93             outfile = self._tmptarget+f
94             mkdir_p(os.path.dirname(outfile))
95             shutil.copyfile(f,outfile)
96         print "Resources[%5.2f s]" % (round(t6-t5,2))
97         # TODO: Improve the sitemap, it is a page that is generated from
98         #       the ground up and added a bit adhoc.
99         sitmaplink = Link('/sitemap')
100         for l in self._sitelang:
101             sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
102         for l in self._sitelang:
103             sitmaplink.page(l).set_article(self.gen_menu(l,None,"tree sitemap"))
104             sitmaplink.page(l).template(self,self._style,self._tmptarget,self._subdir)
105         t7 = time()
106         print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
107
108     def graph(self):
109         self._tree.graph()
110
111     def gen_menu(self,lang,page,cssclass):
112         return self._tree.menu(lang,page,cssclass,self._subdir)
113
114     def lang_menu(self,lang,link):
115         html = ElementMaker()
116         menu = html.ul()
117         for l in link.languages():
118             isoxml = u"//iso_639_3_entry[@*='"+l+"']"
119             ln = self._isocode.xpath(isoxml)[0].get('name')
120             if lang != 'en':
121                 ln = self._tranlang[lang].gettext(ln)
122             p = unicode(link.link())
123             if p[-1] == u'/':
124                 p = p +u'index'
125             p = p+u'.'+l
126             li = html.li(html.a(ln.decode('utf-8'),
127                                 href=self._subdir+p,hreflang=l))
128             menu.append(li)
129         return etree.tostring(menu,encoding='UTF-8',pretty_print=False)
130
131     def publish(self):
132         print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._tmptarget)))
133         ssh_cmd(self._output,"mkdir -p")
134         publish(self._tmptarget, self._output)
135         for res in ["stylesheets","images","js","fonts","favicon.ico"]:
136             if (os.path.exists(self._style+res)):
137                 publish(self._style+res, self._output)
138         ssh_cmd(self._output,"chmod a+rx")