10 from lxml.builder import ElementMaker
12 from treecutter import constants as const
13 from treecutter.trie import Trie
14 from treecutter.link import Link
15 from treecutter.tools import ssh_cmd, publish, mkdir_p,get_folder_size,sizeof_fmt
19 """Class keeping the internal site structure"""
20 def __init__(self,args):
21 self._output = args.output
22 self._style = args.style
23 self._subdir = args.subdir
24 self._file = 'sitemap.txt'
26 self._sitelang = set()
27 self._isocode = etree.parse('/usr/share/xml/iso-codes/iso_639_3.xml')
29 self._tmptarget = tempfile.mkdtemp()+'/'
31 # The sitemap uses a trie structure to keep track of links
32 # A link represents the path to the document and the link
33 # representing the text on the site.
34 # A link can have several pages in different languages.
35 def add_link(self, link):
36 tokens = filter(None,re.split(r'(^/[\w\.:-]*$|^/[\w\.:-]*/|[\w\.:-]*/)',link,flags=re.UNICODE))
37 self._tree.add(tokens,Link(link))
40 f = codecs.open(self._file,'w','utf-8')
41 s = '\n'.join(link.link() for link in self._tree)
47 f = codecs.open(self._file, 'r', 'utf-8')
48 sml = f.read().split()
52 except IOError, what_error:
53 print 'INFO: Could not read sitemap.txt - one will be created'
55 # Create a set of the current tree for comparison with the
58 return set(link.link() for link in self._tree)
61 return [link.link() for link in self._tree]
63 # Main driver in the application processing the documents
64 # in the collected sitemap
67 for link in self._tree:
70 print "Prepare [%5.2f s]" % (round(t2-t1,2))
71 for link in self._tree:
72 self._sitelang = self._sitelang.union(set(link.languages()))
73 for tran in self._sitelang:
75 self._tranlang[tran] = gettext.translation('iso_639_3',
78 print "Language [%5.2f s]" % (round(t3-t2,2))
80 transform['xhtml5'] = etree.XSLT(etree.parse(self._style+"docbook.xhtml5.xsl"))
81 for link in self._tree:
82 link.render(transform)
84 print "Render [%5.2f s]" % (round(t4-t3,2))
85 for link in self._tree:
86 link.template(self, self._style, self._tmptarget,self._subdir)
88 print "Template [%5.2f s]" % (round(t5-t4,2))
91 # Collect all files used by the documents
92 for link in self._tree:
93 res = res.union(link.resources())
95 outfile = self._tmptarget+f
96 mkdir_p(os.path.dirname(outfile))
97 shutil.copyfile(f,outfile)
98 print "Resources[%5.2f s]" % (round(t6-t5,2))
99 # TODO: Improve the sitemap, it is a page that is generated from
100 # the ground up and added a bit adhoc.
101 sitmaplink = Link('/sitemap')
102 for l in self._sitelang:
103 sitmaplink.add_page((l,'/sitemap.'+l+'.xml'))
104 for l in self._sitelang:
105 txtmenu = self.gen_menu(l,None,"tree sitemap")
106 sitmaplink.page(l).set_article(txtmenu)
107 f = open(self._tmptarget+'sitemap.'+l+'.txt', "w")
110 sitmaplink.page(l).template(self,self._style,self._tmptarget,self._subdir)
112 print "Sitemap [%5.2f s]" % (round(t7-t6,2))
117 def gen_menu(self,lang,page,cssclass):
118 return self._tree.menu(lang,page,cssclass,self._subdir)
120 def lang_menu(self,lang,link):
121 html = ElementMaker()
123 for l in link.languages():
124 isoxml = u"//iso_639_3_entry[@*='"+l+"']"
125 ln = self._isocode.xpath(isoxml)[0].get('name')
127 ln = self._tranlang[lang].ugettext(ln)
128 p = unicode(link.link())
132 li = html.li(html.a(ln,
133 href=self._subdir+p,hreflang=l))
135 # print type(etree.tostring(menu,encoding='unicode',pretty_print=False))
136 return etree.tostring(menu,encoding='unicode',pretty_print=False)
139 print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._tmptarget)))
140 ssh_cmd(self._output,"mkdir -p")
141 publish(self._tmptarget, self._output)
142 for res in ["stylesheets","images","js","fonts","favicon.ico"]:
143 if (os.path.exists(self._style+res)):
144 print "Size [ %7s ]" % (sizeof_fmt(get_folder_size(self._style+res)))
145 publish(self._style+res, self._output)
146 ssh_cmd(self._output,"chmod a+rx")