import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.URLEncoder;
import java.net.Socket;
/*
Transparent Society Program
by Andrew William Morrow
http://home.earthlink.net/~amorrow/
This program exists to iterate on http://www.zillow.com/ data records,
(which are indexed by an integer and
number in the tens of millions of American homes)
and do a reverse lookup via http://www.addresses.com/ on
the occupant and phone number of that home
and then create a tile on that location at WikiMapia,
http://www.wikimapia.org/
In a sense, outsiders shine a nice light onto our own American
neighborhoods
that we do not seem to have the guts to shine on ourselves.
By Christmas of 2007, we will all have accepted this as normal.
Large urban centers will be like small towns
where everbody knows a lot of other people in a true global village
style.
To use, you should visit zillow.com and find a home that interests you.
Look at its zillow number in the URL and then,
if you want to make just it, then pick a bunch size of just 1.
If you want the twenty houses around it, then subtract ten from
the zillow number and pick a bunch size of 20.
Please adjust the tile size as appropriate.
This program is intended to shift
the arbitrary boundaries of privacy in the USA.
The information about home and occupants will now be browseable.
The informaion is neutral (neither good nor evil),
but now it will be more easy to access,
making the society of the USA more transparent.
Brief bibliography:
The Transparent Society by David Brin ISBN: 0-201-32802-X
Who Controls the Internet? by Jack Goldsmith ISBN 0-19-515266-2
HP's CoolTown (the apple of Calry Fiorina's eye)
This might provide a more open society for information to support ideas
such as
http://en.wikipedia.org/wiki/Augmented_reality
Welcome to a more transparent society.
TODO:
This program still needs work: when I click on my new tiles, I do not
zoom in as far and when I use a regular web interface.
*/
public class DoWikiMap {
// This controls the tile size drawn. Units: micro-degree
// Suburbs can be about 70, but East Coast row houses will have to be
smaller (maybe as small as 10?)
int tile_size = 20;
// WikiMapia governs how many records you can submit per minute.
// This is the delay (in seconds) to wait after submitting each new
record.
int record_delay = 15;
// This is my account id and encrypted password at WikiMapia
// You can use Ethereal (http://www.ethereal.com/) to figure out what
// to use for these fields for your account
String awm_uid = "9523";
String awm_guestname = "Andrew Morrow";
String awm_pw="ec9087ca14fb31ce71246ca6d149b46b";
// Utility to extract a string delimited by two other strings
public static String scrapeStr(String s, String begin, String end){
int i=s.indexOf(begin);
if(i==(-1)){
// System.out.println(begin + " not found!");
return null;
}
String s2=s.substring(i);
int j=s2.indexOf(end);
if(j==(-1)){
// System.out.println(end + " not found!");
return null;
}
String scrape = s.substring(i + begin.length(),i + j);
// System.out.println("scrape=" + scrape);
return scrape;
}
/*
// Utility Converts certain chars to respective strings (not used)
public static String convert(String o, char[] cFrom, String[] cTo) {
String r = "";
for (int i = 0; i < o.length(); i++ ) {
char x = o.charAt(i);
boolean added = false;
for (int j = 0; j < cFrom.length; j++ ) {
if ((x == cFrom[j]) && (added == false)) {
r = r + cTo[j];
added = true;
}
}
if (added == false) r = r + x;
}
return r;
}
// Makes some adjustments for HTML (not used)
static String htmlize(String o) {
char[] cFrom = { '&', '<', '>' };
String[] cTo = { "&", "<", ">" };
return convert(o,cFrom,cTo);
}
*/
static String urlize(String o) {
String s;
try {
s= URLEncoder.encode(o,"UTF-8");
}catch( UnsupportedEncodingException e){
e.printStackTrace();
s=null;
}
return s;
}
// Take in a lat/long number.
// Zillow delivers the number with a neg sign (if neg),
// a decimal and it truncates trailling zeros
// Remove the decimal point and restore the trailling zeros
// To keep WikiMapia happy
public static String sixdigit(String n){
// Ensure that there are six digits of precision after the decimal
point
int i =n.indexOf('.');
if(i==(-1)){
System.out.println("No decimal point! n=" + n);
return null;
}
// Truncate if too long (I have not seen then case happen yet)
if(n.length() > i + 7){
System.out.println("Long number n=" + n);
n=n.substring(0,i + 7);
}
// Restore trailling zero: WikiMapia has rigid format
while(n.length() < i + 6){
n=n + "0";
}
// drop the decimal point
n= n.substring(0,i) + n.substring(i + 1);
return n;
}
// Simple data record of what to transfer to WikiMapia
class myrec {
int zilnum;
int longi;
int lat;
String street_addr;
String specs;
String name;
String phone;
}
// Main routine: parameters are how many zillow entries to process
// and what index to start on
public static void main(String [] args) {
DoWikiMap as = new DoWikiMap ();
if(args.length != 2){
System.out.println(
"Usage: java DoWikiMap bunchSize startZillowNumber");
return;
}
int bunch = Integer.parseInt(args[0]);
int zilnum = Integer.parseInt(args[1]);
System.out.println("bunch=" + bunch);
System.out.println("zilnum=" + zilnum);
as.doit(bunch, zilnum);
}
// Workhorse routine to iterate over the Zillow entries
void doit(int bunch, int zilstart){
int diddle=0;
for (int i=zilstart; i < zilstart + bunch ; i++ ){
System.out.println("trying zil=" + i);
myrec m = doit_addr(i);
String upcome = null;
if(m==null){
System.out.println("no address rec!");
}else{
upcome = doit_wm(m);
System.out.println("upcoming=" + upcome);
}
// WikiMapia has a governing limit of how many entries any one IP is
allowed
// to create per minute. (three or five or something like that)
// Pause every fifth one anyway to avoid backlogging on a lot of failed
requests
diddle++;
if(upcome!=null || diddle%5 == 0){
System.out.println("sleep");
try {
Thread.sleep(record_delay*1000);
} catch (InterruptedException e){
System.out.println("Interrupt e=" + e);
}
}
}
}
// Given a Zillow number, the the Zillow info and do a reverse lookup
// on the street address.
// This merely scrapes the HTML for the reverse-looked up info
// and stores and returns a record
myrec doit_addr(int zilnum){
// This is a simple GET, no cookies
String resp = null;
String urlStr = "http://www.zillow.com/HomeDetails.htm?zprop=" +
zilnum;
// System.out.println("urlStr=" + urlStr);
try {
InputStream in = (InputStream) new java.net.URL(urlStr).getContent();
// System.out.println("in=" + in);
StringBuffer sb = new StringBuffer();
int ch = 0;
while ((ch = in.read()) != -1) {
sb.append((char)ch);
}
resp = sb.toString();
} catch (Exception ex) {
ex.printStackTrace();
}
// This is the home of the mayor of Concord, North Carolina
// <title>Zillow - 684 Wilshire Ave SW, Concord, NC 28027</title>
String full_addr = scrapeStr(resp, "<title>Zillow - ", "</title>");
if(full_addr==null){
return null;
}
String specs = scrapeStr(resp,"<span class=\"specs\">","</span>");
if(specs==null){
return null;
}
// Specs are multi-line. Just make them one line but leave the tabs
// in there because the Sq. Ft. figure is rendered with commas if
// over 1000 sq. ft.
specs = specs.replace("\n" , " ");
specs = specs.trim();
String lat = scrapeStr(resp,"\"latitude\" : \"" , "\",");
String longi = scrapeStr(resp,"\"longitude\" : \"" , "\",");
lat=sixdigit(lat);
longi=sixdigit(longi);
// Prepare for the next step
// Clean up the fields , break out fields of addr and space->plus
// If we cannot find the address (sometimes missing State or Zip
code?), then punt for now
if(full_addr.length() < 10){
return null;
}
// Discard 5 digit ZIP
String addr=full_addr.substring(0,full_addr.length()-6);
String state_code = addr.substring(addr.length()-2);
addr=addr.substring(0,addr.length()-4);
int i4 = addr.indexOf(", ");
String short_street_addr = addr.substring(0, i4);
String city=addr.substring(i4 + 2);
// Replace blanks with plus chars
Properties cooks = new Properties(); // Accumulate my cookies here
String post_url =
"http://reverse-address-lookup.addresses.com/redir.php";
String args3=
"qa=" + urlize(short_street_addr)
+ "&qc=" + urlize(city)
+ "&qs=" + urlize(state_code)
+ "&SearchP.x=38"
+ "&SerachP.y=7"
+ "&NewSearchFlag=1"
+ "&ReportType=34"
+ "&refer=1271"
+ "&searchform=name"
+ "&sid=1"
+ "&aid="
+ "&adword=" + urlize("ADDR|CRA.MOD");
String s3 = doReq("POST", post_url, cooks, args3);
// We do not need to accumulate these cookies since this is the
// final request of the stream.
// String ok_targ = "HTTP/1.1 200 OK\r\n";
// Properties headProps = new Properties();
// parseHeader(s3,ok_targ, headProps,cooks);
String name= scrapeStr( s3,
"<td class=\"F5\" nowrap><b><font color=\"#000000\">" , "</td>");
String phone = null;
if(name != null){
name=name.toLowerCase();
name=name.trim();
StringBuffer sb=new StringBuffer(name);
sb.setCharAt(0, Character.toUpperCase( sb.charAt(0)));
for (int i3=1; i3 < sb.length() ; i3++ ){
if(sb.charAt(i3) == ' '){
sb.setCharAt(i3 + 1, Character.toUpperCase( sb.charAt(i3 + 1)));
}
}
name=new String(sb);
// That is TWO SPACES in their HTML
phone= scrapeStr(s3, "<td><span" + " " + " " + "class=\"F4\" nowrap>",
"</td>");
}
// Note: this program does NOT handle multiple names
// It only gets the first name reported in the HTML
myrec m = new myrec();
m.zilnum = zilnum;
m.longi = Integer.parseInt(longi);
m.lat = Integer.parseInt(lat);
m.street_addr = short_street_addr;
m.specs= specs;
m.name=name;
m.phone=phone;
return m;
}
// Attempt to submit the info as a new record in WikiMapia
String doit_wm( myrec m){
int zilnum = m.zilnum;
...
read more »