[SCM] watchdog branch, master, updated. a749d12d9dbf698e9b1167a5ff64d2c326002a95

1 view
Skip to first unread message

aaronsw

unread,
Aug 2, 2009, 11:03:12 AM8/2/09
to watchdo...@googlegroups.com
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "watchdog".

The branch, master has been updated
via a749d12d9dbf698e9b1167a5ff64d2c326002a95 (commit)
from e5c3ae3351b605ba3189edc4f1bbca56a62b4075 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit a749d12d9dbf698e9b1167a5ff64d2c326002a95
Author: Aaron Swartz <m...@aaronsw.com>
Date: Sun Aug 2 11:03:08 2009 -0400

don't gzip sitemaps, quote sitemap urls, do uniques thru sort -u, add caching code

-----------------------------------------------------------------------

Summary of changes:
utils/sitemap.py | 28 +++++++++++++++++++---------
webapp.py | 12 +++++++++++-
2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/utils/sitemap.py b/utils/sitemap.py
index 5db4a49..a7819ec 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -5,6 +5,7 @@ import web
import os
import itertools
import datetime
+import urllib

import webapp
from index import getindex
@@ -29,7 +30,7 @@ t_siteindex = """$def with (names, timestamp)
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
$for x in names:
<sitemap>
- <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml.gz</loc>
+ <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml</loc>
<lastmod>$timestamp</lastmod>
</sitemap>
</sitemapindex>
@@ -39,25 +40,34 @@ sitemap = web.template.Template(t_sitemap, filter=web.websafe)
siteindex = web.template.Template(t_siteindex, filter=web.websafe)

def write(path, text):
- from gzip import open as gzopen
print 'writing', path, text.count('\n')
- f = gzopen(path, 'w')
+ f = file(path, 'w')
f.write(text)
f.close()

-def make_siteindex():
- groups = web.group(uniq(getindex(webapp.app)), 50000)
+def make_siteindex(urls):
+ groups = web.group(urls, 50000)

if not os.path.exists('sitemaps'):
os.mkdir('sitemaps')

for i, x in enumerate(groups):
- write("sitemaps/sitemap_%04d.xml.gz" % i, str(sitemap(x)))
+ write("sitemaps/sitemap_%04d.xml" % i, str(sitemap(x)))

names = ["%04d" % j for j in range(i)]
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
index = siteindex(names, timestamp)
- write("sitemaps/siteindex.xml.gz", str(index))
-
+ write("sitemaps/siteindex.xml", str(index))
+
+def write_urls():
+ fh = file('urls.txt', 'w')
+ for line in getindex(webapp.app):
+ fh.write(urllib.quote(line.encode('utf8')) + '\n')
+
+ fh.close()
+
+
if __name__ == "__main__":
- make_siteindex()
+ #write_urls()
+ # sort -u urls.txt > urls.uniq.txt
+ make_siteindex(x.strip() for x in file('urls.uniq.txt'))
diff --git a/webapp.py b/webapp.py
index 4879172..fb7b162 100755
--- a/webapp.py
+++ b/webapp.py
@@ -378,6 +378,11 @@ class occupation:
committees = committees_by_occupation(occupation, 5)
return render.occupation(candidates, committees, occupation)

+def cache_occupation(occupation):
+ candidates = list(candidates_by_occupation(occupation))
+ committees = list(committees_by_occupation(occupation))
+ pickle.dump((candidates, committees), file(config.cache_dir + '/occupation/' + occupation, 'w'))
+
class occupation_candidates:
#index done in occupation
def GET(self, occupation):
@@ -924,4 +929,9 @@ app.notfound = notfound
if production_mode:
pass#app.internalerror = web.emailerrors(config.send_errors_to, internalerror)

-if __name__ == "__main__": app.run()
+if __name__ == "__main__":
+ import sys
+ if sys.argv[1] == 'cache':
+ cache_occupation(sys.argv[2])
+ else:
+ app.run()


hooks/post-receive
--
watchdog

Reply all
Reply to author
Forward
0 new messages