The branch, master has been updated
via e5c3ae3351b605ba3189edc4f1bbca56a62b4075 (commit)
from c075d4ba4bb40cbdd95ba2770ae8ff42baab1140 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit e5c3ae3351b605ba3189edc4f1bbca56a62b4075
Author: Aaron Swartz <m...@aaronsw.com>
Date: Fri Jul 31 01:40:25 2009 -0400
fix bugs: dupe urls, crashers
-----------------------------------------------------------------------
Summary of changes:
utils/sitemap.py | 17 +++++++++++------
1 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/utils/sitemap.py b/utils/sitemap.py
index 1dfc4c0..5db4a49 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -9,13 +9,18 @@ import datetime
import webapp
from index import getindex
+def uniq(iterator):
+ seen = set()
+ for item in iterator:
+ if item in seen: continue
+ seen.add(item)
+ yield item
+
t_sitemap = """$def with (items)
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
$for i in items:
- <url>
- <loc>http://watchdog.net$i</loc>
- </url>
+ <url><loc>http://watchdog.net$i</loc></url>
</urlset>
"""
@@ -30,8 +35,8 @@ t_siteindex = """$def with (names, timestamp)
</sitemapindex>
"""
-sitemap = web.template.Template(t_sitemap)
-siteindex = web.template.Template(t_siteindex)
+sitemap = web.template.Template(t_sitemap, filter=web.websafe)
+siteindex = web.template.Template(t_siteindex, filter=web.websafe)
def write(path, text):
from gzip import open as gzopen
@@ -41,7 +46,7 @@ def write(path, text):
f.close()
def make_siteindex():
- groups = web.group(getindex(webapp.app), 50000)
+ groups = web.group(uniq(getindex(webapp.app)), 50000)
if not os.path.exists('sitemaps'):
os.mkdir('sitemaps')
hooks/post-receive
--
watchdog