vertx 3 httpClient unshorten url - redirect handling example

1,026 views
Skip to first unread message

Jazz

unread,
Mar 25, 2015, 9:09:10 PM3/25/15
to ve...@googlegroups.com

I haven't seen any example of httpclient used to unshorten urls, so I thought of writing one and improve it based on your comments....

So using the method below if I start with:

The output will be:
http://t.co/Oj3GYaGzER statusCode = 301
http://aja.me/msxc statusCode = 301
http://trib.al/gATHbk1 statusCode = 301
http://www.aljazeera.net/news/arabic/2015/3/25/%D8...... statusCode = 400

So this works fine but how can we use observables to do this and return the final url?




   
private void unshorten(String url) {
       
        URI uri
= null;
       
try {
            uri
= new URI(url);
       
} catch (Exception ex) {
            log
.error("invalid url", ex);
           
return;
       
}

       
int port = uri.getPort();
       
if (port <= 0) {
           
String scheme = uri.getScheme();
           
if ("http".equalsIgnoreCase(scheme)) {
                port
= 80;
           
} else if ("https".equalsIgnoreCase(scheme)) {
                port
= 443;
           
} else {
                log
.error("unknow port");
               
return;
           
}
       
}

       
String requestURI = uri.getPath();
       
String query = uri.getQuery();
       
if (query != null && !query.trim().isEmpty()) {
            requestURI
+= "?" + query;
       
}

       
HttpClient client = vertx.createHttpClient();
       
HttpClientRequest request = client.head(port, uri.getHost(), requestURI, response -> {
            log
.debug(url + " statusCode = " + response.statusCode());
           
String location = response.getHeader("location");
           
if (location != null && !location.trim().isEmpty()) {
                unshorten
(location);
           
}
       
});
       
// Note: some shorten url servers will fail to reply if there is no user-agent header set
        request
.putHeader("User-Agent", "Mozilla/5.0");
        request
.end();
       
   
}

 


Jazz

unread,
Mar 25, 2015, 11:55:58 PM3/25/15
to ve...@googlegroups.com

Here is the example using observables, it works but I don't know if this is the right way to do it!
Any comments will be appreciated! thanks!



       
   
private void test() {    
       
JsonObject jo = new JsonObject().put("url", "http://t.co/Oj3GYaGzER");
       
Observable<String> obs = unshorten(jo);
        obs
.subscribe(location -> {
            log
.debug("final location is = " + location);
       
}, t -> {
            log
.error("ex", t);
       
}, () -> {
            log
.debug("complete");
       
});
       
   
}


   
private Observable<String> unshorten(JsonObject input) {
       
       
String url = input.getString("url");


        URI uri
= null;
       
try {
            uri
= new URI(url);
       
} catch (Exception ex) {
            log
.error("invalid url", ex);

           
return null;

       
}

       
int port = uri.getPort();
       
if (port <= 0) {
           
String scheme = uri.getScheme();
           
if ("http".equalsIgnoreCase(scheme)) {
                port
= 80;
           
} else if ("https".equalsIgnoreCase(scheme)) {
                port
= 443;
           
} else {
                log
.error("unknow port");

               
return null;

           
}
       
}

       
String requestURI = uri.getPath();
       
String query = uri.getQuery();
       
if (query != null && !query.trim().isEmpty()) {
            requestURI
+= "?" + query;
       
}

       
HttpClient client = vertx.createHttpClient();

       
HttpClientRequest request = client.head(port, uri.getHost(), requestURI);
       
       

       
OnSubscribe<JsonObject> onSubscribe = new Observable.OnSubscribe<JsonObject>() {
           
@Override
           
public void call(Subscriber<? super JsonObject> observer) {

                request
.toObservable()
               
.flatMap(response -> {

                    log
.debug(url + " statusCode = " + response.statusCode());
                   
String location = response.getHeader("location");

                   
JsonObject jo = new JsonObject();
                   
if (location != null) {
                        jo
.put("location", location);
                   
}
                    jo
.put("url", url);
                    observer
.onNext(jo);
                    observer
.onCompleted();
                   
return Observable.empty();
               
})                
               
.subscribe();


               
// Note: some shorten url servers will fail to reply if there is no user-agent header set
                request
.putHeader("User-Agent", "Mozilla/5.0");
                request
.end();
               
           
}

       
};
       
       
       
Observable<String> obs = Observable.create(onSubscribe)
       
.flatMap(jo -> {
           
String location = jo.getString("location");
           
String url2 = jo.getString("url");
           
if (location == null || location.trim().isEmpty()) {
               
return Observable.just(url2);
           
} else {
                jo
.put("url", location);
               
return unshorten(jo);
           
}
       
});
   
       
return obs;

       
   
}


 

The output is:

23:52:00.315 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:136] - http://t.co/Oj3GYaGzER statusCode = 301
23:52:00.498 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:136] - http://aja.me/msxc statusCode = 301
23:52:00.822 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:136] - http://trib.al/gATHbk1 statusCode = 301
23:52:01.227 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:136] - http://www.aljazeera.net/news/arabic... statusCode = 400
23:52:01.228 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:84] - final location is = http://www.aljazeera.net/news/arabic/2015/3/25/%D8%A8%D8%A7%D9%86-%D9%84%D8%A7-%D8%AD%D9%84-%D8%B9%D8%B3%D9%83%D8%B1%D9%8A%D8%A7-%D9%84%D9%84%D8%A3%D8%B2%D9%85%D8%A9-%D9%81%D9%8A-%D8%A7%D9%84%D9%8A%D9%85%D9%86
23:52:01.228 [vert.x-eventloop-thread-2] DEBUG c.a.v.h.server.MyHttpServerVerticle - [MyHttpServerVerticle.java:88] - complete

 





Julien Viet

unread,
Mar 26, 2015, 10:23:33 AM3/26/15
to ve...@googlegroups.com, Jazz
Hi,

first you should not return null but rather an Observable that fails.

Then your implementation can be simplified a bit, here is my version:

private Observable<String> unshorten(HttpClient client, String url) {

URI uri;

try {
uri = new URI(url);
} catch (Exception ex) {
    return Observable.error(ex);

}

int port = uri.getPort();
if (port <= 0) {
String scheme = uri.getScheme();
if ("http".equalsIgnoreCase(scheme)) {
port = 80;
} else if ("https".equalsIgnoreCase(scheme)) {
port = 443;
} else {
      return Observable.error(new Exception("Unknown port"));

}
}

String requestURI = uri.getPath();
String query = uri.getQuery();
if (query != null && !query.trim().isEmpty()) {
requestURI += "?" + query;
}

  HttpClientRequest request = client.head(port, uri.getHost(), requestURI);

  Observable<String> observable = request.toObservable().flatMap(resp -> {
String location = resp.getHeader("location");

if (location == null || location.trim().isEmpty()) {
      return Observable.just(url);
} else {
return unshorten(client, location);
}
});

// We use a subject because we need to subscribe before ending the request as the client
// does not support the other way
ReplaySubject<String> subject = ReplaySubject.create();
return subject.doOnSubscribe(() -> {
observable.subscribe(subject);

request.putHeader("User-Agent", "Mozilla/5.0");
request.end();
  });
}

-- 
Julien Viet
www.julienviet.com
--
You received this message because you are subscribed to the Google Groups "vert.x" group.
To unsubscribe from this group and stop receiving emails from it, send an email to vertx+un...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Jazz

unread,
Mar 26, 2015, 1:33:58 PM3/26/15
to ve...@googlegroups.com, jazz...@gmail.com

Thank you so much Julien for the ReplaySubject part, this is very helpful, thank you!

So I added that part, plus redirect loop detection, and returning all urls instead of just the final location.

The output will be like:
location 0 = http://t.co/Oj3GYaGzER statusCode = 301
location 1 = http://aja.me/msxc statusCode = 301
location 2 = http://trib.al/gATHbk1 statusCode = 301
location 3 = http://www.aljazeera.net/news/arabic/.... statusCode = 400

Known Issues:
- relative redirect location is not supported yet
- can't handle non-ascii characters in location header, but with Alexander's code mentioned here https://groups.google.com/forum/#!topic/vertx/TU7k-dKbbms it is possible! Thanks Alexander!
- sometimes servers will generate a javascript function or  a meta redirect, so we need to use GET method and handle the body content, when statusCode = 400 (bad request) for example.




       
   
private void test() {    
       
       
String url = "http://t.co/Oj3GYaGzER";
       
int maxRedirectsCount = 5;
       
HttpClient client = vertx.createHttpClient();
       
Observable<JsonArray> obs = unshorten(client, url, maxRedirectsCount);
       
        obs
.subscribe(list -> {
           
for (int i=0; i<list.size(); i++) {
               
JsonObject info = list.getJsonObject(i);
               
int statusCode = info.getInteger("statusCode");
               
String _url = info.getString("url");
                log
.debug("location " + i +" = " + _url + " statusCode = " + statusCode);                
           
}

       
}, t -> {
            log
.error("ex", t);
       
}, () -> {
            log
.debug("complete");
       
});
       
   
}


   
private Observable<JsonArray> unshorten(HttpClient client, String url, int maxRedirectsCount) {
       
JsonArray urlsList = new JsonArray();

       
JsonObject jo = new JsonObject();

        jo
.put("url", url);
        urlsList
.add(jo);
       
return unshorten(client, maxRedirectsCount, urlsList);
   
}

   
private Observable<JsonArray> unshorten(HttpClient client, int maxRedirectsCount, JsonArray list) {
       
       
JsonObject info = list.getJsonObject(list.size()-1);
       
String url = info.getString("url");

       
        URI uri
= null;
       
try {
            uri
= new URI(url);
       
} catch (Exception ex) {

           
return Observable.error(new Exception("invalid url", ex));        
       
}


       
int port = uri.getPort();
       
if (port <= 0) {
           
String scheme = uri.getScheme();
           
if ("http".equalsIgnoreCase(scheme)) {
                port
= 80;
           
} else if ("https".equalsIgnoreCase(scheme)) {
                port
= 443;
           
} else {

               
return Observable.error(new Exception("unknown port"));

           
}
       
}

       
String requestURI = uri.getPath();
       
String query = uri.getQuery();
       
if (query != null && !query.trim().isEmpty()) {
            requestURI
+= "?" + query;
       
}
       
       
HttpClientRequest request = client.head(port, uri.getHost(), requestURI);


       
Observable<JsonArray> obs = request.toObservable()

       
.flatMap(response -> {
            log
.debug(url + " statusCode = " + response.statusCode());

           
final String location = response.getHeader("location");
           
final int statusCode = response.statusCode();
           
boolean redirect = false;
           
           
// 301, 302, 303, 307
           
if (statusCode == HttpResponseStatus.MOVED_PERMANENTLY.code() ||                    
                    statusCode
== HttpResponseStatus.FOUND.code() ||
                    statusCode
== HttpResponseStatus.SEE_OTHER.code() ||
                    statusCode
== HttpResponseStatus.TEMPORARY_REDIRECT.code()) {
                redirect
= true;
           
}
           
           
if (redirect && (location == null || location.trim().isEmpty())) {
               
return Observable.error(new Exception("location is empty"));
           
}

           
           
// redirect loop check
           
if (redirect) {
               
for (int i=0; i<list.size(); i++) {
                   
JsonObject _info = list.getJsonObject(i);
                   
String _url = _info.getString("url");
                   
if (location.equals(_url)) {
                       
return Observable.error(new Exception("redirect loop detected"));
                   
}
               
}
           
}
           
           
// max redirects count
           
if (list.size() > maxRedirectsCount) {
               
return Observable.error(new Exception("max redirects count reached"));
           
}
                       
           
// update current url info
            info
.put("statusCode", statusCode);
           
           
// return or expand
           
if (redirect) {
               
// expand url
               
JsonObject newInfo = new JsonObject();
                newInfo
.put("url", location);
                list
.add(newInfo);
               
return unshorten(client, maxRedirectsCount, list);
           
} else {
               
// return result
               
return Observable.just(list);
           
}

           
       
});                
       
       
ReplaySubject<JsonArray> subject = ReplaySubject.create();
       
return subject.doOnSubscribe(() -> {
            obs
.subscribe(subject);

Jazz

unread,
Mar 26, 2015, 6:57:19 PM3/26/15
to ve...@googlegroups.com, jazz...@gmail.com
Reply all
Reply to author
Forward
0 new messages