size: printing size of style
[treecutter.git] / treecutter / sitemap.py
1 #!/usr/bin/python
2 import os
3 import codecs
4 import re
5 import shutil
6 import sys
7 import gettext
8 import tempfile
9 from lxml import etree
10 from lxml.builder import ElementMaker
11 from time import time
12 from treecutter import constants as const
13 from treecutter.trie import Trie
14 from treecutter.link import Link
15 from treecutter.tools import ssh_cmd, publish, mkdir_p,get_folder_size,sizeof_fmt
16
17
18 class Sitemap():
19     """Class keeping the internal site structure"""
20     def __init__(self,args):
21         self._output = args.output
22         self._style = args.style
23         self._subdir = args.subdir
24         self._file = 'sitemap.txt'
25         self._tree = Trie()
26         self._sitelang = set()
27         self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
28         self._tranlang = {}
29         self._tmptarget = tempfile.mkdtemp()+'/'
30
31     # The sitemap uses a trie structure to keep track of links
32     # A link represents the path to the document and the link
33     # representing the text on the site.
34     # A link can have several pages in different languages.
35     def add_link(self, link):
36         tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
37         self._tree.add(tokens,Link(link))
38
39     def write_map(self):
40         f = codecs.open(self._file,'w','utf-8')
41         s = '\n'.join(link.link() for link in self._tree)
42         f.write(s)
43         f.close()
44
45     def read_map(self):
46         try:
47             f = codecs.open(self._file, 'r', 'utf-8')
48             sml = f.read().split()
49             f.close()
50             for line in sml:
51                 self.add_link(line)
52         except IOError, what_error:
53             print 'INFO: Could not read sitemap.txt - one will be created'
54
55     # Create a set of the current tree for comparison with the
56     # directory scan
57     def set(self):
58         return set(link.link() for link in self._tree)
59
60     def linklist(self):
61         return [link.link() for link in self._tree]
62
63     # Main driver in the application processing the documents
64     # in the collected sitemap
65     def process(self):
66         t1 = time()
67         for link in self._tree:
68             link.prepare()
69         t2 = time()
70         print "Prepare  [%5.2f s]" % (round(t2-t1,2))
71         for link in self._tree:
72             self._sitelang = self._sitelang.union(set(link.languages()))
73         for tran in self._sitelang:
74             if tran != 'en':
75                 self._tranlang[tran] = gettext.translation('iso_639_3',
76                                                            languages=[tran])
77         t3 = time()
78         print "Language [%5.2f s]" % (round(t3-t2,2))
79         transform = {}
80         transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
81         for link in self._tree:
82             link.render(transform)
83         t4 = time()
84         print "Render   [%5.2f s]" % (round(t4-t3,2))
85         for link in self._tree:
86             link.template(self, self._style, self._tmptarget,self._subdir)
87         t5 = time()
88         print "Template [%5.2f s]" % (round(t5-t4,2))
89         t6 = time()
90         res = set()
91         # Collect all files used by the documents
92         for link in self._tree:
93             res = res.union(link.resources())
94         for f in res:
95             outfile = self._tmptarget+f
96             mkdir_p(os.path.dirname(outfile))
97             shutil.copyfile(f,outfile)
98         print "Resources[%5.2f s]" % (round(t6-t5,2))
99         # TODO: Improve the sitemap, it is a page that is generated from
100         #       the ground up and added a bit adhoc.
101         sitmaplink = Link('/sitemap')
102         for l in self._sitelang:
103             sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
104         for l in self._sitelang:
105             txtmenu = self.gen_menu(l,None,"tree sitemap")
106             sitmaplink.page(l).set_article(txtmenu)
107             f = open(self._tmptarget+'sitemap.'+l+'.txt', "w")
108             f.write(txtmenu)
109             f.close()
110             sitmaplink.page(l).template(self,self._style,self._tmptarget,self._subdir)
111         t7 = time()
112         print "Sitemap  [%5.2f s]" % (round(t7-t6,2))
113
114     def graph(self):
115         self._tree.graph()
116
117     def gen_menu(self,lang,page,cssclass):
118         return self._tree.menu(lang,page,cssclass,self._subdir)
119
120     def lang_menu(self,lang,link):
121         html = ElementMaker()
122         menu = html.ul()
123         for l in link.languages():
124             isoxml = u"//iso_639_3_entry[@*='"+l+"']"
125             ln = self._isocode.xpath(isoxml)[0].get('name')
126             if lang != 'en':
127                 ln = self._tranlang[lang].ugettext(ln)
128             p = unicode(link.link())
129             if p[-1] == u'/':
130                 p = p +u'index'
131             p = p+u'.'+l
132             li = html.li(html.a(ln,
133                                 href=self._subdir+p,hreflang=l))
134             menu.append(li)
135 #            print type(etree.tostring(menu,encoding='unicode',pretty_print=False))
136         return etree.tostring(menu,encoding='unicode',pretty_print=False)
137
138     def publish(self):
139         print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._tmptarget)))
140         ssh_cmd(self._output,"mkdir -p")
141         publish(self._tmptarget, self._output)
142         for res in ["stylesheets","images","js","fonts","favicon.ico"]:
143             if (os.path.exists(self._style+res)):
144                 print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._style+res)))
145                 publish(self._style+res, self._output)
146         ssh_cmd(self._output,"chmod a+rx")