The branch, master has been updated
via 6b264aa231acdb6fd34c8ec7e5b56b2ff70b7c95 (commit)
from 1058f8ab69ac33a70dd4ade7d58fb6e520328c9a (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 6b264aa231acdb6fd34c8ec7e5b56b2ff70b7c95
Author: Aaron Swartz <m...@aaronsw.com>
Date: Thu Jul 30 23:37:26 2009 -0400
add sitemap generator
-----------------------------------------------------------------------
Summary of changes:
utils/sitemap.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 58 insertions(+), 0 deletions(-)
create mode 100644 utils/sitemap.py
diff --git a/utils/sitemap.py b/utils/sitemap.py
new file mode 100644
index 0000000..5121a16
--- /dev/null
+++ b/utils/sitemap.py
@@ -0,0 +1,58 @@
+"""Script to generate XML sitemap of openlibrary.org website.
+"""
+
+import web
+import os
+import itertools
+import datetime
+
+import webapp
+from index import getindex
+
+t_sitemap = """$def with (items)
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+ $for i in items:
+ <url>
+ <loc>http://watchdog.net$i</loc>
+ </url>
+</urlset>
+"""
+
+t_siteindex = """$def with (names, timestamp)
+<?xml version="1.0" encoding="UTF-8"?>
+<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+ $for x in names:
+ <sitemap>
+ <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml.gz</loc>
+ <lastmod>$timestamp</lastmod>
+ </sitemap>
+</sitemapindex>
+"""
+
+sitemap = web.template.Template(t_sitemap)
+siteindex = web.template.Template(t_siteindex)
+
+def write(path, text):
+ from gzip import open as gzopen
+ print 'writing', path, text.count('\n')
+ f = gzopen(path, 'w')
+ f.write(text)
+ f.close()
+
+def make_siteindex():
+ groups = web.group(getindex(webapp.app), 50000)
+
+ if not os.path.exists('sitemaps'):
+ os.mkdir('sitemaps')
+
+ for i, x in enumerate(groups):
+ write("sitemaps/sitemap_%04d.xml.gz" % i, str(sitemap(x)))
+ names = ["%04d" % j for j in range(i)]
+ timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
+ index = siteindex(names, timestamp)
+ write("sitemaps/siteindex.xml.gz", index)
+
+if __name__ == "__main__":
+ make_siteindex()
+
hooks/post-receive
--
watchdog