The branch, master has been updated
via a749d12d9dbf698e9b1167a5ff64d2c326002a95 (commit)
from e5c3ae3351b605ba3189edc4f1bbca56a62b4075 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit a749d12d9dbf698e9b1167a5ff64d2c326002a95
Author: Aaron Swartz <m...@aaronsw.com>
Date: Sun Aug 2 11:03:08 2009 -0400
don't gzip sitemaps, quote sitemap urls, do uniques thru sort -u, add caching code
-----------------------------------------------------------------------
Summary of changes:
utils/sitemap.py | 28 +++++++++++++++++++---------
webapp.py | 12 +++++++++++-
2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/utils/sitemap.py b/utils/sitemap.py
index 5db4a49..a7819ec 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -5,6 +5,7 @@ import web
import os
import itertools
import datetime
+import urllib
import webapp
from index import getindex
@@ -29,7 +30,7 @@ t_siteindex = """$def with (names, timestamp)
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
$for x in names:
<sitemap>
- <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml.gz</loc>
+ <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml</loc>
<lastmod>$timestamp</lastmod>
</sitemap>
</sitemapindex>
@@ -39,25 +40,34 @@ sitemap = web.template.Template(t_sitemap, filter=web.websafe)
siteindex = web.template.Template(t_siteindex, filter=web.websafe)
def write(path, text):
- from gzip import open as gzopen
print 'writing', path, text.count('\n')
- f = gzopen(path, 'w')
+ f = file(path, 'w')
f.write(text)
f.close()
-def make_siteindex():
- groups = web.group(uniq(getindex(webapp.app)), 50000)
+def make_siteindex(urls):
+ groups = web.group(urls, 50000)
if not os.path.exists('sitemaps'):
os.mkdir('sitemaps')
for i, x in enumerate(groups):
- write("sitemaps/sitemap_%04d.xml.gz" % i, str(sitemap(x)))
+ write("sitemaps/sitemap_%04d.xml" % i, str(sitemap(x)))
names = ["%04d" % j for j in range(i)]
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
index = siteindex(names, timestamp)
- write("sitemaps/siteindex.xml.gz", str(index))
-
+ write("sitemaps/siteindex.xml", str(index))
+
+def write_urls():
+ fh = file('urls.txt', 'w')
+ for line in getindex(webapp.app):
+ fh.write(urllib.quote(line.encode('utf8')) + '\n')
+
+ fh.close()
+
+
if __name__ == "__main__":
- make_siteindex()
+ #write_urls()
+ # sort -u urls.txt > urls.uniq.txt
+ make_siteindex(x.strip() for x in file('urls.uniq.txt'))
diff --git a/webapp.py b/webapp.py
index 4879172..fb7b162 100755
--- a/webapp.py
+++ b/webapp.py
@@ -378,6 +378,11 @@ class occupation:
committees = committees_by_occupation(occupation, 5)
return render.occupation(candidates, committees, occupation)
+def cache_occupation(occupation):
+ candidates = list(candidates_by_occupation(occupation))
+ committees = list(committees_by_occupation(occupation))
+ pickle.dump((candidates, committees), file(config.cache_dir + '/occupation/' + occupation, 'w'))
+
class occupation_candidates:
#index done in occupation
def GET(self, occupation):
@@ -924,4 +929,9 @@ app.notfound = notfound
if production_mode:
pass#app.internalerror = web.emailerrors(config.send_errors_to, internalerror)
-if __name__ == "__main__": app.run()
+if __name__ == "__main__":
+ import sys
+ if sys.argv[1] == 'cache':
+ cache_occupation(sys.argv[2])
+ else:
+ app.run()
hooks/post-receive
--
watchdog