Looping through a casper.page.close()

51 views
Skip to first unread message

pa...@sparklinesolutions.com

unread,
Apr 21, 2016, 1:11:02 AM4/21/16
to CasperJS
Hi Guys,

I want to crawl a lot of links, but I find that using only one instance of casperjs/phantomjs uses up a lot of CPU and Memory.

So I wanted to try out if I can just loop the whole casper process inside a while and use casper.page.close() each loop to minimize the resource usage

But there's something wrong in my loop though, my sample script is below:

var links = [];
var pendingUrls = [
   
'http://casperjs.org/',
   
'http://phantomjs.org/'
];

function getLinks() {
   
var links = [];
   
Array.prototype.forEach.call(__utils__.findAll('a'), function(e) {
        links
.push(e.getAttribute('href'));
   
});
   
return links;
}

function crawl(url) {
   
var casper = require('casper').create();
    casper
.start(url);
    casper
.then(function() {
       
this.echo(this.getTitle());
        links
= this.evaluate(getLinks);
       
this.echo(links.length + ' links found:');
       
this.echo(' - ' + links.join('\n - ')).exit();
   
});
    casper
.then(function() {
        casper
.page.close();
   
});
    casper
.run(function() {
       
this.exit();
   
});
}

while (pendingUrls.length > 0) {
    nextURL
= pendingUrls.shift();
    crawl
(nextURL);
}


In my output, only one of the links in the pendingURL list is crawled.  The other one returns no output.  Any ideas?
Reply all
Reply to author
Forward
0 new messages