[SCM] watchdog branch, master, updated. 8b378cf82015dc26fe3ea00cbac8acea581802e0

1 view
Skip to first unread message

aaronsw

unread,
Aug 2, 2009, 11:18:40 AM8/2/09
to watchdo...@googlegroups.com
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "watchdog".

The branch, master has been updated
via 8b378cf82015dc26fe3ea00cbac8acea581802e0 (commit)
via 61eda462d54d5fb5794d5716f7ea1f268dc4e5ae (commit)
from a749d12d9dbf698e9b1167a5ff64d2c326002a95 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 8b378cf82015dc26fe3ea00cbac8acea581802e0
Author: Aaron Swartz <m...@aaronsw.com>
Date: Sun Aug 2 11:18:36 2009 -0400

smarter quoting, faster writing

commit 61eda462d54d5fb5794d5716f7ea1f268dc4e5ae
Author: Aaron Swartz <m...@aaronsw.com>
Date: Sun Aug 2 11:09:57 2009 -0400

readd gzip

-----------------------------------------------------------------------

Summary of changes:
utils/sitemap.py | 20 +++++++++++++-------
webapp.py | 10 +++++-----
2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/utils/sitemap.py b/utils/sitemap.py
index a7819ec..58434d0 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -30,7 +30,7 @@ t_siteindex = """$def with (names, timestamp)
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
$for x in names:
<sitemap>
- <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml</loc>
+ <loc>http://watchdog.net/static/sitemaps/sitemap_${x}.xml.gz</loc>
<lastmod>$timestamp</lastmod>
</sitemap>
</sitemapindex>
@@ -40,8 +40,9 @@ sitemap = web.template.Template(t_sitemap, filter=web.websafe)
siteindex = web.template.Template(t_siteindex, filter=web.websafe)

def write(path, text):
+ from gzip import open as gzopen
print 'writing', path, text.count('\n')
- f = file(path, 'w')
+ f = gzopen(path, 'w')
f.write(text)
f.close()

@@ -52,21 +53,26 @@ def make_siteindex(urls):
os.mkdir('sitemaps')

for i, x in enumerate(groups):
- write("sitemaps/sitemap_%04d.xml" % i, str(sitemap(x)))
+ sitemap_lines = [
+ '<?xml version="1.0" encoding="UTF-8"?>',
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">']
+ for item in x:
+ sitemap_lines.append('<url><loc>http://watchdog.net%s</loc></url>' % item)
+ sitemap_lines.append('</urlset>')
+ write("sitemaps/sitemap_%04d.xml.gz" % i, '\n'.join(sitemap_lines)))

names = ["%04d" % j for j in range(i)]
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
index = siteindex(names, timestamp)
- write("sitemaps/siteindex.xml", str(index))
+ write("sitemaps/siteindex.xml.gz", str(index))

def write_urls():
fh = file('urls.txt', 'w')
for line in getindex(webapp.app):
- fh.write(urllib.quote(line.encode('utf8')) + '\n')
-
+ fh.write(line + '\n')
+
fh.close()

-
if __name__ == "__main__":
#write_urls()
# sort -u urls.txt > urls.uniq.txt
diff --git a/webapp.py b/webapp.py
index fb7b162..5824e3b 100755
--- a/webapp.py
+++ b/webapp.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-import re, sys
+import re, sys, urllib
import web

from utils import zip2rep, simplegraphs, apipublish, users, writerep, se, wyrapp, api
@@ -330,7 +330,7 @@ class contributor:
names = name.lower().split(', ')
if len(names) > 1:
return '_'.join(names[1].split() + [names[0]])
- return name
+ return urllib.quote(name)
return ('/contrib/%s/%s' % (c.zip, format(c.name)) \
for c in db.select('contribution', what='zip, name'))

@@ -365,7 +365,7 @@ class occupation:
#/occupation/<occupation>, /occupation/<occupation>/candidates, /occupation/<occupation>/committees
occupations = (c.occupation.lower() \
for c in db.query('select distinct occupation from contribution'))
- return (('/occupation/%s' % o, '/occupation/%s/candidates' % o, '/occupation/%s/committees' % o) \
+ return (('/occupation/%s' % urllib.quote(o), '/occupation/%s/candidates' % urllib.quote(o), '/occupation/%s/committees' % urllib.quote(o)) \
for o in occupations if o)

def GET(self, occupation):
@@ -404,7 +404,7 @@ class occupation_committees:
class contributions:
"""from a corp to a pol"""
def index(self):
- return ('/contrib/?from=%s&to=%s' % (c.frm, c.to) \
+ return ('/contrib/?from=%s&to=%s' % (urllib.quote(c.frm), urllib.quote(c.to)) \
for c in db.query("""SELECT cn.employer_stem as frm, p.id as to
FROM contribution cn, committee cm, politician_fec_ids pfi, politician p
WHERE cn.recipient_id = cm.id AND cm.candidate_id = pfi.fec_id
@@ -434,7 +434,7 @@ class contributions:
class employer:
def index(self):
#'/empl/(.*?)%s?'
- return ('/empl/%s' % (c.employer_stem) \
+ return ('/empl/%s' % (urllib.quote(c.employer_stem)) \
for c in db.query('select distinct(employer_stem) from contribution'))

def GET(self, corp_id, format=None):


hooks/post-receive
--
watchdog

Reply all
Reply to author
Forward
0 new messages