[SCM] watchdog branch, master, updated. 71e2b95973b8b0f742f2744b724044815c51a6a6

1 view
Skip to first unread message

aaronsw

unread,
Aug 13, 2009, 3:37:34 PM8/13/09
to watchdo...@googlegroups.com
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "watchdog".

The branch, master has been updated
via 71e2b95973b8b0f742f2744b724044815c51a6a6 (commit)
from 8b378cf82015dc26fe3ea00cbac8acea581802e0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 71e2b95973b8b0f742f2744b724044815c51a6a6
Author: Aaron Swartz <m...@aaronsw.com>
Date: Thu Aug 13 15:37:19 2009 -0400

add support for exempt organizations (tx Noufal Ibrahim)

-----------------------------------------------------------------------

Summary of changes:
import/parse/fixed_width.py | 10 ++++++----
import/parse/irs_eo.py | 22 ++++++++++++++--------
schema.py | 30 ++++++++++++++++++++++++++++++
static/css/style.css | 1 +
templates/exempt_org.html | 21 +++++++++++++++++++++
utils/helpers.py | 12 ++++++++++++
utils/sitemap.py | 2 +-
vendor/smartersql.py | 1 +
webapp.py | 18 ++++++++++++++++--
9 files changed, 102 insertions(+), 15 deletions(-)
create mode 100644 templates/exempt_org.html

diff --git a/import/parse/fixed_width.py b/import/parse/fixed_width.py
index 791f7b1..21fc8fe 100644
--- a/import/parse/fixed_width.py
+++ b/import/parse/fixed_width.py
@@ -30,6 +30,7 @@ def string(s):

def state(s):
s = string(s)
+ if s == '.': return None
assert s == s.upper()
assert s.isalpha()
return s
@@ -89,7 +90,7 @@ FIELD_TYP = 2

## The functions you might want to call

-def parse_line(linedef, line):
+def parse_line(linedef, line, debug=False):
out = storage()
n = 0
for (k, l, t) in linedef:
@@ -98,8 +99,9 @@ def parse_line(linedef, line):
if k is None:
t(line[n:n+l])
else:
+ if debug: print k, repr(line[n:n+l]),
out[k] = t(line[n:n+l])
- #print k, repr(line[n:n+l])
+ if debug: print repr(out[k])
if l > 0: n += l
return out

@@ -112,7 +114,7 @@ def get_len(filedef):
else:
return sum(line[FIELD_LEN] for line in filedef)

-def parse_file(filedef, fh, f_whichdef=None):
+def parse_file(filedef, fh, f_whichdef=None, debug=False):
linelen = get_len(filedef)
if isinstance(filedef, dict):
if not f_whichdef: f_whichdef = lambda x: x[0]
@@ -120,4 +122,4 @@ def parse_file(filedef, fh, f_whichdef=None):
f_whichdef = lambda x: slice(None, None)
for line in iter(lambda: fh.read(linelen), ''):
if line.replace('\x00', '').strip():
- yield parse_line(filedef[f_whichdef(line)], line)
+ yield parse_line(filedef[f_whichdef(line)], line, debug=debug)
diff --git a/import/parse/irs_eo.py b/import/parse/irs_eo.py
index a4ec2b7..7744326 100644
--- a/import/parse/irs_eo.py
+++ b/import/parse/irs_eo.py
@@ -1,17 +1,18 @@
+import glob, itertools
from fixed_width import integer, string, date, filler, parse_file, enum, state, digits

def integer2(s): return integer(s[-1] + s[:-1])

def_eo = [
- ('_type', 0, lambda s: 'Exempt Organization'),
+# ('_type', 0, lambda s: 'Exempt Organization'),
('ein', 9-0, digits),
('primary_name', 79-9, string),
('careof_name', 114-79, string),
('street', 149-114, string),
('city', 171-149, string),
('state', 173-171, state),
- ('zip', 183-173, digits),
- ('group_exemption_num', 187-183, integer),
+ ('zip', 183-173, string),
+ ('group_exemption_num', 187-183, string),
('subsection_code', 189-187, string),
('affiliation', 1, enum),
('classification_code', 194-190, string),
@@ -34,11 +35,16 @@ def_eo = [
('ntee_code', 282-278, string),
('sort_name', 318-282, string),
(None, 2, filler('\r\n'))
-
]

+def parse():
+ return itertools.chain(*[parse_file(def_eo, file(fn)) for fn in glob.glob('../data/crawl/irs/eo/*.LST')])
+
if __name__ == "__main__":
- import glob
- import tools
- for fn in glob.glob('../data/crawl/irs/eo/*.LST'):
- tools.export(parse_file(def_eo, file(fn)))
+ import sys
+ if 'load' in sys.argv:
+ from settings import db
+ db.multiple_insert("exempt_org", parse(), seqname=False)
+ else:
+ import tools
+ tools.export(parse())
diff --git a/schema.py b/schema.py
index c78e339..d9f95ae 100644
--- a/schema.py
+++ b/schema.py
@@ -619,6 +619,36 @@ class Past_Elections(sql.Table):
pct_votes_received = sql.Percentage()
expenditure = sql.Dollars()

+class Exempt_Org(sql.Table):
+ ein = sql.Integer(primary=True)
+ primary_name = sql.String()
+ careof_name = sql.String()
+ street = sql.String()
+ city = sql.String()
+ state = sql.String(2)
+ zip = sql.String()
+ group_exemption_num = sql.String()
+ subsection_code = sql.String()
+ affiliation = sql.String()
+ classification_code = sql.String()
+ ruling_date = sql.String()
+ deductibility_code = sql.String()
+ foundation_code = sql.String()
+ activity_code = sql.String()
+ organization_code = sql.String()
+ exempt_org_status_code = sql.String()
+ advance_ruling_expiration = sql.String()
+ tax_period = sql.String()
+ asset_code = sql.String()
+ income_code = sql.String()
+ filing_requirement_code = sql.String()
+ accounting_period = sql.String()
+ asset_amt = sql.BigInteger()
+ income_amt = sql.BigInteger()
+ form_990_revenue_amt = sql.BigInteger()
+ ntee_code = sql.String()
+ sort_name = sql.String()
+
def init():
db.query("CREATE VIEW census AS select * from census_meta NATURAL JOIN census_data")
db.query("CREATE INDEX contribution_recipient_id_idx ON contribution (recipient_id)")
diff --git a/static/css/style.css b/static/css/style.css
index 54b617d..98037f1 100644
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -313,3 +313,4 @@ h2.repred { color: #EE4400; margin-bottom: 6px; }
td.rule {border-top: 1px solid #ccc; border-bottom: 1px solid #ccc; }
td.rule-below { border-bottom: 1px solid #ccc; }

+th, td { vertical-align: top; }
diff --git a/templates/exempt_org.html b/templates/exempt_org.html
new file mode 100644
index 0000000..fb5af54
--- /dev/null
+++ b/templates/exempt_org.html
@@ -0,0 +1,21 @@
+$def with (org)
+
+$var width: 90%
+$var color: white
+$var title: $org.primary_name.title()
+
+<h1>$org.primary_name.title()</h1>
+
+<table>
+ <tr><th>EIN</th><td>$org.ein</td></tr>
+ <tr><th>IRS approved on</th><td>$org.ruling_date</td></tr>
+ <tr><th>Last filed</th><td>$org.tax_period</td></tr>
+ <tr><th>Contact</th><td>$org.careof_name.title()</td></tr>
+ <tr><th>Address</th><td>$org.street<br />$org.city, $org.state $org.zip</td></tr>
+ <tr><th>Filing month</th><td>$org.accounting_period</td></tr>
+ <tr><th>Assets</th><td>\$$commify(org.asset_amt)</td></tr>
+ <tr><th>Income</th><td>\$$commify(org.income_amt)</td></tr>
+ <tr><th>Revenue</th><td>\$$commify(org.form_990_revenue_amt)</td></tr>
+ <tr><th>AKA</th><td>$org.sort_name</td></tr>
+</table>
+
diff --git a/utils/helpers.py b/utils/helpers.py
index fcd5430..a72c417 100644
--- a/utils/helpers.py
+++ b/utils/helpers.py
@@ -6,6 +6,18 @@ import web
from config import secret_key
from settings import db

+def urlify(s):
+ """
+ >>> urlify("What the !@#$%^ is going on here!?")
+ 'what-the--is-going-on-here'
+ """
+ s = s.lower()
+ out = []
+ for k in s:
+ if k == " ": out.append('-')
+ elif k.isalpha() or k.isdigit(): out.append(k)
+ return ''.join(out)
+
def encrypt(msg, key=None):
return hmac.new(key or secret_key, msg).hexdigest()

diff --git a/utils/sitemap.py b/utils/sitemap.py
index 58434d0..2590f5e 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -59,7 +59,7 @@ def make_siteindex(urls):
for item in x:
sitemap_lines.append('<url><loc>http://watchdog.net%s</loc></url>' % item)
sitemap_lines.append('</urlset>')
- write("sitemaps/sitemap_%04d.xml.gz" % i, '\n'.join(sitemap_lines)))
+ write("sitemaps/sitemap_%04d.xml.gz" % i, '\n'.join(sitemap_lines))

names = ["%04d" % j for j in range(i)]
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
diff --git a/vendor/smartersql.py b/vendor/smartersql.py
index 03f9da0..aee8ff1 100644
--- a/vendor/smartersql.py
+++ b/vendor/smartersql.py
@@ -240,6 +240,7 @@ class Float(Column):

class Serial(Integer): sql_type = 'serial'
class Int2(Integer): sql_type = 'int2'
+class BigInteger(Integer): sql_type = 'bigint'
class Date(Column): sql_type = 'date'

class Year(Integer): pass
diff --git a/webapp.py b/webapp.py
index 5824e3b..ac16d9b 100755
--- a/webapp.py
+++ b/webapp.py
@@ -3,7 +3,7 @@
import re, sys, urllib
import web

-from utils import zip2rep, simplegraphs, apipublish, users, writerep, se, wyrapp, api
+from utils import zip2rep, simplegraphs, apipublish, users, writerep, se, wyrapp, api, helpers
import blog
import petition
import settings
@@ -49,6 +49,7 @@ urls = (
r'/lob/o/?(.*?)', 'lob_org',
r'/lob/pa/?(.*?)', 'lob_pac',
r'/lob/pe/?(.*?)', 'lob_person',
+ r'/ein/(\d+)(/.*)?', 'ein',
r'/writerep', wyrapp.app,
r'/api', api.app,
r'/about(/?)', 'about',
@@ -671,6 +672,19 @@ class lob_person:
if not p: raise web.notfound()
return render.lob_person(p, limit)

+class ein:
+ def index(self):
+ pass #@@
+
+ def GET(self, ein, slug=None):
+ try:
+ p = schema.Exempt_Org.select(where='ein=$ein', vars=locals())[0]
+ except IndexError:
+ raise web.notfound()
+ if slug != '/' + helpers.urlify(p.primary_name):
+ raise web.redirect('/ein/%s/%s' % (ein, helpers.urlify(p.primary_name)))
+ return render.exempt_org(p)
+
class politician_introduced:
def index(self):
#/p/(.*?)/introduced
@@ -931,7 +945,7 @@ if production_mode:

if __name__ == "__main__":
import sys
- if sys.argv[1] == 'cache':
+ if len(sys.argv) > 1 and sys.argv[1] == 'cache':
cache_occupation(sys.argv[2])
else:
app.run()


hooks/post-receive
--
watchdog

Reply all
Reply to author
Forward
0 new messages