Get the URL of a page after a Javascript redirection

3,292 views
Skip to first unread message

Thierry Sans

unread,
May 19, 2014, 4:17:53 PM5/19/14
to phan...@googlegroups.com
Hello, 

Assuming that a page "http://example.com/" contains a Javascript redirection such as 

<script>
window.location = "http://www.google.com/"
</script>

How can I get the url of the redirected page, "http://www.google.com/" here?

mingtan du

unread,
May 20, 2014, 8:03:54 AM5/20/14
to phan...@googlegroups.com
you can use this:

var sys = require('system');
var pageUrl = ( sys.args[1] ) ? sys.args[1] : phantom.exit(0);

function forceExit(){
    phantom.exit(0);
}

var renderPage = function (url) {
    var page = require('webpage').create();
    
    page.onNavigationRequested = function(url, type, willNavigate, main) {
        var tmpUrl = ( url.substr(url.length - 1) != '/' ) ? url+'/' : url;
        var tmpPageUrl = ( pageUrl.substr( pageUrl.length - 1) != '/' ) ? pageUrl+'/' : pageUrl;
        console.log(tmpUrl,tmpPageUrl);
        if (main && tmpUrl!=tmpPageUrl ) {
            pageUrl = url;
            sys.stdout.write(url+'\n');
            setTimeout(forceExit,100 );
        }
    };

    page.open(url, function(status) {
        if ( status !== 'success' ) {
            phantom.exit( 1 );
        } else {
            phantom.exit( 0 );
        }
    });
};

renderPage( pageUrl );

any more, see:https://github.com/ariya/phantomjs/issues/10389

Thierry Sans

unread,
May 20, 2014, 12:29:19 PM5/20/14
to phan...@googlegroups.com
This code might work for a server side redirection but it does not seem to work for a javascript redirection kicking in once the webpage has been loaded. Am I right? 

mingtan du

unread,
May 21, 2014, 1:20:07 AM5/21/14
to phan...@googlegroups.com
it can work for a javascript redirection, you just try it,

and this is my result:






redirect.html:
<!Doctype html>
<html>
<head>
<script>
window.location = "http://www.google.com/"
</script>
</head>
<body>
</body>
</html>


Thierry Sans

unread,
May 21, 2014, 9:03:53 AM5/21/14
to phan...@googlegroups.com
Still it was not working but I have found a fix: 

    page.open(url, function(status) {
           
if ( status !== 'success' ) {
                phantom
.exit(1);
           
} else {

                setTimeout
(function() {
                    phantom
.exit(0);
               
}, 0);
           
}
       
});

This seems to be the reason why: 


Thank a lot for you help. 


Thierry Sans

unread,
May 21, 2014, 9:28:05 AM5/21/14
to phan...@googlegroups.com
This is the code I have in the end: 

var sys = require('system');
var pageUrl = ( sys.args[1] ) ? sys.args[1] : phantom.exit(0);



console
.log("Requested URL: " + pageUrl);



var renderPage = function (url) {
   
var page = require('webpage').create();
   
    page
.onNavigationRequested = function(url, type, willNavigate, main) {

       
if (main && url!=pageUrl) {
            console
.log("Redirected URL: " + url)
       
}
   
};



    page
.open(url, function(status) {
           
if ( status !== 'success' ) {
                phantom
.exit(1);
           
} else {
                setTimeout
(function() {
                    phantom
.exit(0);
               
}, 0);
           
}
       
});
};


renderPage
(pageUrl);

Let me know if you have comments. Thanks again. 

mingtan du

unread,
May 22, 2014, 7:22:17 AM5/22/14
to phan...@googlegroups.com
sorry, some bug in script, here is the newest:
var sys = require('system');
var pageUrl = ( sys.args[1] ) ? sys.args[1] : phantom.exit(0);

function forceExit(){
    phantom.exit(0);
}

var renderPage = function (url) {
    var page = require('webpage').create();
    
    page.onNavigationRequested = function(url, type, willNavigate, main) {
        var tmpUrl = ( url.substr(url.length - 1) != '/' ) ? url+'/' : url;
        var tmpPageUrl = ( pageUrl.substr( pageUrl.length - 1) != '/' ) ? pageUrl+'/' : pageUrl;
        console.log(tmpUrl,tmpPageUrl);
        if (main && tmpUrl!=tmpPageUrl ) {
            pageUrl = url;
            sys.stdout.write(url+'\n');
            setTimeout(forceExit,100 );
        }
    };

    page.open(url, function(status) {
        if ( status !== 'success' ) {
            phantom.exit( 1 );
        } else {
            phantom.exit( 0 );
        }
    },100);

    setTimeout(forceExit,2000 );
};

renderPage( pageUrl );
Reply all
Reply to author
Forward
0 new messages