Parsing Twitter Search Results in JSON and Atom

129 views
Skip to first unread message

ajstarks

unread,
Nov 18, 2009, 3:43:06 AM11/18/09
to golang-nuts
The link:

http://www.flickr.com/photos/ajstarks/4114605512/ shows:

the time a Go program takes to search twitter, parse the results, and
output the tweets, varying the number of search results from 10-100.
The blue shows the results for JSON, the green for ATOM (XML).

Also depicted is the number of bytes to parsed, and the resulting
parsing rate.

The conclusion is that JSON is more efficient; HTTP rates are
constant, but JSON requires less data, with less complex data
structures to unmarshal. Both methods deliver identical results:

ts -f json -n 10 '#golang'

RT @koizuka: RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに
使うという領域の方がおおきいんだとおもう
RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおき
いんだとおもう
Ambient Occlusion benchmark <a href="http://bit.ly/1idsyh"
rel="nofollow">bit.ly/1idsyh</a> by @aobench ported to #golang <a
href="http://bit.ly/2oYfKL" rel="nofollow">bit.ly/2oYfKL</a> , also
Haskell, C++, and um...everything
RT @alanQuatermain: Anyone following my #golang bundle for #textmate
on github should change to the new repository: <a href="http://bit.ly/
3zgbkL" rel="nofollow">bit.ly/3zgbkL</a>
Anyone following my #golang bundle for #textmate on github should
change to the new repository (different name/layout): <a href="http://
bit.ly/3zgbkL" rel="nofollow">bit.ly/3zgbkL</a>
Sometime in the last 20 hours, @github learned to highlight #golang
code. Apparently it does the highlighting on push.
Interesting. RT @jonfernquest: Lambda calculus interpreter in Google's
#golang <a href="http://bit.ly/21rQnB" rel="nofollow">bit.ly/21rQnB</
a> via @liris
「GoでHTTPサーバ その3 Cometもどき」http://bit.ly/1gbMPo 書いた #golang
Lambda calculus interpreter in Google's #golang <a href="http://bit.ly/
21rQnB" rel="nofollow">bit.ly/21rQnB</a> via @liris
golangにlambdaはいらないと思う。 <a href="http://bit.ly/21rQnB"
rel="nofollow">bit.ly/21rQnB</a> でも、あればあったで使いそう。(ちなみに、プロジェクトができただけみた
い) #golang

Here is the program; it demonstrates command line parsing, error
checking, http processing and unmarshaling both JSON and XML.

// ts -- twitter search
//
// Anthony Starks (ajst...@gmail.com)
//

package main

import (
"fmt";
"http";
"io";
"flag";
"os";
"xml";
"json";
)

var format = flag.String("f", "atom", "Output format (json or atom)")
var nresults = flag.Int("n", 20, "Maximum results (up to 100)")
var since = flag.String("d", "", "Search since this date (YYYY-MM-
DD)")
const queryURI = "http://search.twitter.com/search.%s?q=%s&since=
%s&rpp=%d"
//const outputfmt = "%s \u27BE %s\n"
const outputfmt = "%s\n"


// Atom Feed Data Structure

type Feed struct {
XMLName xml.Name "http://www.w3.org/2005/Atom feed";
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Entry []Entry;
}

type Entry struct {
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Summary Text;
}

type Link struct {
Rel string "attr";
Href string "attr";
}

type Person struct {
Name string;
URI string;
Email string;
}

type Text struct {
Type string "attr";
Body string "chardata";
}

type Time string


// JSON Data Structure

type JTweets struct {
Results []Result;
Max_id int;
Since_id int;
Refresh_url string;
Next_page string;
Page int;
Completed_in float;
Query string;

}

type Result struct {
Profile_image_url string;
Created_at string;
From_user string;
To_user_id string;
Text string;
Id string;
From_user_id int;
Geo string;
Iso_language_code string;
Source string;
}


func ts(s string, how string, date string, n int) {

if how == "json" || how == "atom" {
r, _, err := http.Get(fmt.Sprintf(queryURI, how, http.URLEscape(s),
date, n));
if err == nil {
fmt.Fprintf(os.Stderr, "\nSearching for '%s' (%d results in %s)\n",
s, n, how);
if r.StatusCode == http.StatusOK {
if how == "atom" {
readatom(r.Body);
} else if how == "json" {
readjson(r.Body);
}
} else {
fmt.Fprintf(os.Stderr,
"Twitter is unable to search for %s as %s (%s)\n", s, how,
r.Status);
}
r.Body.Close();
} else {
fmt.Fprintf(os.Stderr, "%v\n", err);
}
}
}


func readatom(r io.Reader) {
var twitter Feed;
err := xml.Unmarshal(r, &twitter);

if err == nil {
for i := 0; i < len(twitter.Entry); i++ {
fmt.Printf(outputfmt,
/*twitter.Entry[i].Author.Name,*/ twitter.Entry[i].Title);
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the Atom feed (%v)\n", err);
}
}

func readjson(r io.ReadCloser) {
var twitter JTweets;
var b []byte;
b, err := io.ReadAll(r);

if err == nil {
ok, errtok := json.Unmarshal(string(b), &twitter);
if (ok) {
for i := 0; i < len(twitter.Results); i++ {
fmt.Printf(outputfmt,
/*twitter.Results[i].From_user,*/ twitter.Results[i].Text);
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the JSON feed (%v)\n",
errtok);
}
} else {
fmt.Fprintf(os.Stderr, "%v\n", err);
}
}

func dump(r io.ReadCloser) {
var b []byte;
b, err := io.ReadAll(r);

if err == nil {
fmt.Printf(string(b));
} else {
fmt.Fprintf(os.Stderr, "%v\n", err);
}
}

func main() {
flag.Parse();
for i := 0; i < flag.NArg(); i++ {
ts(flag.Arg(i), *format, *since, *nresults)
}
}
Reply all
Reply to author
Forward
0 new messages