var fs = require('fs');
function innerCall(nam, ref) {
var ipage = require("webpage").create();
ipage.settings.userAgent = 'RosettaCodeSlurper';
ipage.open(ref, function(status) {
console.log('Finished inner');
var description = ipage.evaluate(function() {
var start = document.getElementsByClassName("infobox")[0];
var cursor = start.nextElementSibling;
var desc = "";
while (cursor.tagName !== "TABLE" && cursor.id !== "toc") {
there = cursor;
desc = desc + there.innerText + "\n";
cursor = cursor.nextElementSibling;
}
return desc;
});
var fileName = nam + ".txt";
fileName = fileName.replace(/\//g, "_");
var h = fs.open(fileName, 'w');
h.write(description);
h.flush();
h.close();
ipage.close();
}
);
}
var page = require('webpage').create();
page.settings.userAgent = 'RosettaCodeSlurper';
page.open('http://rosettacode.org/wiki/Category:Programming_Tasks', function(status) {
console.log('Finished outer');
var anchors = page.evaluate(function() {
var result = [];
var anchs = document.getElementById("mw-pages").getElementsByTagName("a");
for (var i = 0; i < anchs.length; i++) {
result.push([anchs[i].innerHTML, anchs[i].href]);
}
return result;
});
for (var i = 0; i < anchors.length; i++) {
var txt = anchors[i][0];
var hrf = anchors[i][1];
innerCall(txt, hrf);
}
//page.close();
phantom.exit();
});
If you reuse the `page` object as Ivan suggested, you'll need to synchronize your loop iterations.
Your problem is that, right now, you are spawning `anchors.length` new asynchronously executing `WebPage#open` calls simultaneously. Not only does that eat up a huge chunk of memory (2000 links => 2000 new WebPage instances created) but it also opens potentially the same number of file descriptors for writing out your results. This could be running into system/session limitations like `ulimit` maximums.
Lastly but certainly not leastly, your `phantom.exit()` call is going to occur before all/most/any of your loop's async callouts are finished.
Sincerely,
James M. Greene
You should reuse same webpage instance in your innerCall function. So I suggest that you move ipage declaration outside the innerCall function, also remove ipage.close() call.
--
You received this message because you are subscribed to the Google Groups "phantomjs" group.
To unsubscribe from this group and stop receiving emails from it, send an email to phantomjs+...@googlegroups.com.
Visit this group at http://groups.google.com/group/phantomjs.
For more options, visit https://groups.google.com/d/optout.