Script only works sometimes

146 views
Skip to first unread message

kutchbhi

unread,
May 31, 2013, 8:12:45 AM5/31/13
to phan...@googlegroups.com
var page = require('webpage').create();
var fs = require('fs');
page
.open('https://www.somesite.com/someapge.aspx');

page
.onLoadFinished = function () {
   
var content = page.evaluate(function(){
       
var root = document.getElementsByTagName("html")[0];
       
var html = root ? root.outerHTML : document.body.innerHTML;
       
return html ;
   
});

    page
.render('export.png');
    fs
.write('login.html', content, 'w');

phantom
.exit() ;
}


The above script works sometimes, othertimes it fails to render the output and only saves the html BEFORE the javascript on the page is executed.
Why ?
--version 1.6.0

James Greene

unread,
May 31, 2013, 3:03:17 PM5/31/13
to phan...@googlegroups.com
Two things:
  1. You are adding the load handler AFTER you actually request to open the page, meaning that it is possible that the handler isn't even attached when the page finishes loading.  Easy fix: attach the handler (`onLoadFinished`) before calling `open`.
  2. If the JavaScript executes asynchronously (as most does/should), then you need to add a wait duration or condition check before you know it's ready to render.

Dumb way (arbitrarily wait a duration of 10 seconds):

var page = require('webpage').create();
var fs = require('fs');

page.onLoadFinished = function(status) {
  if (status === "failed") {
    console.error("Failed to load: " + this.url);
    phantom.exit(1);
    return;
  }

  // Wait 10 seconds and then render
  setTimeout(function() {
    var content = page.evaluate(function() {
      var root = document.getElementsByTagName("html")[0];
      var html = root ? root.outerHTML : document.body.innerHTML;
      return html;
    });

    page.render('export.png');
    fs.write('login.html', content, 'w');

    phantom.exit(0);
  }, 10000);
};



Smarter way (wait till some condition is true, e.g. an element that is rendered near the end of the JS execution flow is present):

var page = require('webpage').create();
var fs = require('fs');
var maxTimeout = 10000;

page.onLoadFinished = function(status) {
  if (status === "failed") {
    console.error("Failed to load: " + this.url);
    phantom.exit(1);
    return;
  }

  var startTimeAfterLoad = (new Date()).getTime();

  // Check every 1 seconds until the element is found
  var intervalId = setInterval(function() {
    var foundElement = page.evaluate(function() {
      return !!document.getElementById("idOfSomeElementRenderedByJavaScript");
    });
    if (foundElement) {
      clearInterval(intervalId);

      var content = page.evaluate(function() {
        var root = document.getElementsByTagName("html")[0];
        var html = root ? root.outerHTML : document.body.innerHTML;
        return html;
      });

      page.render('export.png');
      fs.write('login.html', content, 'w');

      phantom.exit(0);
    }
    // Bail out after waiting a total of 10 seconds
    else if ((new Date()).getTime() > (startTimeAfterLoad maxTimeout)) {
      clearInterval(intervalId);
      console.error('Timed out after ' + maxTimeout + ' ms');
      phantom.exit(1);
    }
  }, 1000);
};



You can find similar examples in the "examples" folder included with PhantomJS (or on GitHub), e.g. "waitFor.js".  Hope this helps!

Sincerely,
    James Greene



--
You received this message because you are subscribed to the Google Groups "phantomjs" group.
To unsubscribe from this group and stop receiving emails from it, send an email to phantomjs+...@googlegroups.com.
Visit this group at http://groups.google.com/group/phantomjs?hl=en.
For more options, visit https://groups.google.com/groups/opt_out.
 
 

kutchbhi

unread,
Jun 2, 2013, 9:37:53 AM6/2/13
to phan...@googlegroups.com
Thanks dude, that answer was perfect ! Really appreciate it .
Reply all
Reply to author
Forward
0 new messages