The branch, master has been updated
via 71e2b95973b8b0f742f2744b724044815c51a6a6 (commit)
from 8b378cf82015dc26fe3ea00cbac8acea581802e0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 71e2b95973b8b0f742f2744b724044815c51a6a6
Author: Aaron Swartz <m...@aaronsw.com>
Date: Thu Aug 13 15:37:19 2009 -0400
add support for exempt organizations (tx Noufal Ibrahim)
-----------------------------------------------------------------------
Summary of changes:
import/parse/fixed_width.py | 10 ++++++----
import/parse/irs_eo.py | 22 ++++++++++++++--------
schema.py | 30 ++++++++++++++++++++++++++++++
static/css/style.css | 1 +
templates/exempt_org.html | 21 +++++++++++++++++++++
utils/helpers.py | 12 ++++++++++++
utils/sitemap.py | 2 +-
vendor/smartersql.py | 1 +
webapp.py | 18 ++++++++++++++++--
9 files changed, 102 insertions(+), 15 deletions(-)
create mode 100644 templates/exempt_org.html
diff --git a/import/parse/fixed_width.py b/import/parse/fixed_width.py
index 791f7b1..21fc8fe 100644
--- a/import/parse/fixed_width.py
+++ b/import/parse/fixed_width.py
@@ -30,6 +30,7 @@ def string(s):
def state(s):
s = string(s)
+ if s == '.': return None
assert s == s.upper()
assert s.isalpha()
return s
@@ -89,7 +90,7 @@ FIELD_TYP = 2
## The functions you might want to call
-def parse_line(linedef, line):
+def parse_line(linedef, line, debug=False):
out = storage()
n = 0
for (k, l, t) in linedef:
@@ -98,8 +99,9 @@ def parse_line(linedef, line):
if k is None:
t(line[n:n+l])
else:
+ if debug: print k, repr(line[n:n+l]),
out[k] = t(line[n:n+l])
- #print k, repr(line[n:n+l])
+ if debug: print repr(out[k])
if l > 0: n += l
return out
@@ -112,7 +114,7 @@ def get_len(filedef):
else:
return sum(line[FIELD_LEN] for line in filedef)
-def parse_file(filedef, fh, f_whichdef=None):
+def parse_file(filedef, fh, f_whichdef=None, debug=False):
linelen = get_len(filedef)
if isinstance(filedef, dict):
if not f_whichdef: f_whichdef = lambda x: x[0]
@@ -120,4 +122,4 @@ def parse_file(filedef, fh, f_whichdef=None):
f_whichdef = lambda x: slice(None, None)
for line in iter(lambda: fh.read(linelen), ''):
if line.replace('\x00', '').strip():
- yield parse_line(filedef[f_whichdef(line)], line)
+ yield parse_line(filedef[f_whichdef(line)], line, debug=debug)
diff --git a/import/parse/irs_eo.py b/import/parse/irs_eo.py
index a4ec2b7..7744326 100644
--- a/import/parse/irs_eo.py
+++ b/import/parse/irs_eo.py
@@ -1,17 +1,18 @@
+import glob, itertools
from fixed_width import integer, string, date, filler, parse_file, enum, state, digits
def integer2(s): return integer(s[-1] + s[:-1])
def_eo = [
- ('_type', 0, lambda s: 'Exempt Organization'),
+# ('_type', 0, lambda s: 'Exempt Organization'),
('ein', 9-0, digits),
('primary_name', 79-9, string),
('careof_name', 114-79, string),
('street', 149-114, string),
('city', 171-149, string),
('state', 173-171, state),
- ('zip', 183-173, digits),
- ('group_exemption_num', 187-183, integer),
+ ('zip', 183-173, string),
+ ('group_exemption_num', 187-183, string),
('subsection_code', 189-187, string),
('affiliation', 1, enum),
('classification_code', 194-190, string),
@@ -34,11 +35,16 @@ def_eo = [
('ntee_code', 282-278, string),
('sort_name', 318-282, string),
(None, 2, filler('\r\n'))
-
]
+def parse():
+ return itertools.chain(*[parse_file(def_eo, file(fn)) for fn in glob.glob('../data/crawl/irs/eo/*.LST')])
+
if __name__ == "__main__":
- import glob
- import tools
- for fn in glob.glob('../data/crawl/irs/eo/*.LST'):
- tools.export(parse_file(def_eo, file(fn)))
+ import sys
+ if 'load' in sys.argv:
+ from settings import db
+ db.multiple_insert("exempt_org", parse(), seqname=False)
+ else:
+ import tools
+ tools.export(parse())
diff --git a/schema.py b/schema.py
index c78e339..d9f95ae 100644
--- a/schema.py
+++ b/schema.py
@@ -619,6 +619,36 @@ class Past_Elections(sql.Table):
pct_votes_received = sql.Percentage()
expenditure = sql.Dollars()
+class Exempt_Org(sql.Table):
+ ein = sql.Integer(primary=True)
+ primary_name = sql.String()
+ careof_name = sql.String()
+ street = sql.String()
+ city = sql.String()
+ state = sql.String(2)
+ zip = sql.String()
+ group_exemption_num = sql.String()
+ subsection_code = sql.String()
+ affiliation = sql.String()
+ classification_code = sql.String()
+ ruling_date = sql.String()
+ deductibility_code = sql.String()
+ foundation_code = sql.String()
+ activity_code = sql.String()
+ organization_code = sql.String()
+ exempt_org_status_code = sql.String()
+ advance_ruling_expiration = sql.String()
+ tax_period = sql.String()
+ asset_code = sql.String()
+ income_code = sql.String()
+ filing_requirement_code = sql.String()
+ accounting_period = sql.String()
+ asset_amt = sql.BigInteger()
+ income_amt = sql.BigInteger()
+ form_990_revenue_amt = sql.BigInteger()
+ ntee_code = sql.String()
+ sort_name = sql.String()
+
def init():
db.query("CREATE VIEW census AS select * from census_meta NATURAL JOIN census_data")
db.query("CREATE INDEX contribution_recipient_id_idx ON contribution (recipient_id)")
diff --git a/static/css/style.css b/static/css/style.css
index 54b617d..98037f1 100644
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -313,3 +313,4 @@ h2.repred { color: #EE4400; margin-bottom: 6px; }
td.rule {border-top: 1px solid #ccc; border-bottom: 1px solid #ccc; }
td.rule-below { border-bottom: 1px solid #ccc; }
+th, td { vertical-align: top; }
diff --git a/templates/exempt_org.html b/templates/exempt_org.html
new file mode 100644
index 0000000..fb5af54
--- /dev/null
+++ b/templates/exempt_org.html
@@ -0,0 +1,21 @@
+$def with (org)
+
+$var width: 90%
+$var color: white
+$var title: $org.primary_name.title()
+
+<h1>$org.primary_name.title()</h1>
+
+<table>
+ <tr><th>EIN</th><td>$org.ein</td></tr>
+ <tr><th>IRS approved on</th><td>$org.ruling_date</td></tr>
+ <tr><th>Last filed</th><td>$org.tax_period</td></tr>
+ <tr><th>Contact</th><td>$org.careof_name.title()</td></tr>
+ <tr><th>Address</th><td>$org.street<br />$org.city, $org.state $org.zip</td></tr>
+ <tr><th>Filing month</th><td>$org.accounting_period</td></tr>
+ <tr><th>Assets</th><td>\$$commify(org.asset_amt)</td></tr>
+ <tr><th>Income</th><td>\$$commify(org.income_amt)</td></tr>
+ <tr><th>Revenue</th><td>\$$commify(org.form_990_revenue_amt)</td></tr>
+ <tr><th>AKA</th><td>$org.sort_name</td></tr>
+</table>
+
diff --git a/utils/helpers.py b/utils/helpers.py
index fcd5430..a72c417 100644
--- a/utils/helpers.py
+++ b/utils/helpers.py
@@ -6,6 +6,18 @@ import web
from config import secret_key
from settings import db
+def urlify(s):
+ """
+ >>> urlify("What the !@#$%^ is going on here!?")
+ 'what-the--is-going-on-here'
+ """
+ s = s.lower()
+ out = []
+ for k in s:
+ if k == " ": out.append('-')
+ elif k.isalpha() or k.isdigit(): out.append(k)
+ return ''.join(out)
+
def encrypt(msg, key=None):
return hmac.new(key or secret_key, msg).hexdigest()
diff --git a/utils/sitemap.py b/utils/sitemap.py
index 58434d0..2590f5e 100644
--- a/utils/sitemap.py
+++ b/utils/sitemap.py
@@ -59,7 +59,7 @@ def make_siteindex(urls):
for item in x:
sitemap_lines.append('<url><loc>http://watchdog.net%s</loc></url>' % item)
sitemap_lines.append('</urlset>')
- write("sitemaps/sitemap_%04d.xml.gz" % i, '\n'.join(sitemap_lines)))
+ write("sitemaps/sitemap_%04d.xml.gz" % i, '\n'.join(sitemap_lines))
names = ["%04d" % j for j in range(i)]
timestamp = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
diff --git a/vendor/smartersql.py b/vendor/smartersql.py
index 03f9da0..aee8ff1 100644
--- a/vendor/smartersql.py
+++ b/vendor/smartersql.py
@@ -240,6 +240,7 @@ class Float(Column):
class Serial(Integer): sql_type = 'serial'
class Int2(Integer): sql_type = 'int2'
+class BigInteger(Integer): sql_type = 'bigint'
class Date(Column): sql_type = 'date'
class Year(Integer): pass
diff --git a/webapp.py b/webapp.py
index 5824e3b..ac16d9b 100755
--- a/webapp.py
+++ b/webapp.py
@@ -3,7 +3,7 @@
import re, sys, urllib
import web
-from utils import zip2rep, simplegraphs, apipublish, users, writerep, se, wyrapp, api
+from utils import zip2rep, simplegraphs, apipublish, users, writerep, se, wyrapp, api, helpers
import blog
import petition
import settings
@@ -49,6 +49,7 @@ urls = (
r'/lob/o/?(.*?)', 'lob_org',
r'/lob/pa/?(.*?)', 'lob_pac',
r'/lob/pe/?(.*?)', 'lob_person',
+ r'/ein/(\d+)(/.*)?', 'ein',
r'/writerep', wyrapp.app,
r'/api', api.app,
r'/about(/?)', 'about',
@@ -671,6 +672,19 @@ class lob_person:
if not p: raise web.notfound()
return render.lob_person(p, limit)
+class ein:
+ def index(self):
+ pass #@@
+
+ def GET(self, ein, slug=None):
+ try:
+ p = schema.Exempt_Org.select(where='ein=$ein', vars=locals())[0]
+ except IndexError:
+ raise web.notfound()
+ if slug != '/' + helpers.urlify(p.primary_name):
+ raise web.redirect('/ein/%s/%s' % (ein, helpers.urlify(p.primary_name)))
+ return render.exempt_org(p)
+
class politician_introduced:
def index(self):
#/p/(.*?)/introduced
@@ -931,7 +945,7 @@ if production_mode:
if __name__ == "__main__":
import sys
- if sys.argv[1] == 'cache':
+ if len(sys.argv) > 1 and sys.argv[1] == 'cache':
cache_occupation(sys.argv[2])
else:
app.run()
hooks/post-receive
--
watchdog