CasperJS crashes (maybe due to memory)

326 views
Skip to first unread message

Jim Schultz

unread,
Jul 21, 2016, 12:03:32 AM7/21/16
to CasperJS
So I'm trying to get a program running that goes through about 2500 sites and scrapes for certain data, though it crashes at about 25% of the way through. Right now my temporary solution is to create 4 programs and run them one after the other, but thats not preferable. I am looking for something like PhantomJS's clearMemoryCache(), though casper.page.pageModuleApi().clearMemoryCache() doesn't work. Here's my code
var casper = require('casper').create({
  verbose
: true,
  logLevel
: 'error',
    pageSettings
: {
      loadImages
: false,
      loadPlugins
: false,
      userAgent
: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
   
}
});
var fs = require('fs');

var links = []; //There are 2500 links
 
 
var currentTime = new Date();
var month = currentTime.getMonth() + 1;
var day = currentTime.getDate();
var year = currentTime.getFullYear();

casper
.start().each(links, function(self, link) {
   
self.thenOpen(link, function() {
               
this.mouse.click(1,1);
               
this.mouse.click(1,1);
               
var hour = currentTime.getHours();
               
var minute = currentTime.getMinutes();
               
var second = currentTime.getSeconds();
               
var time = hour + ":" + minute + "." + second;
       
this.wait(5000, function(){
               
var fetch = this.evaluate(function(){
                       
return document.getElementById("add2Cart_1").className;
               
});
               
var title = this.evaluate(function() {
                       
return document.title;
               
});
                       
var zipcode = this.evaluate(function() {
                       
return document.getElementById("pickupZip").value;
               
});
               
var stock = sort(fetch);
               
this.capture(link + '.png');
                fs
.write(month + ", " + day + ", " + year + ".csv", title + ", " + zipcode + ", " + stock + ", " + time + "\n", "a");
                phantom
.clearCookies();
       
});
   
});
});


casper
.run();

function sort(string) {
   
if (string == "button yellow add2Cart disabled") {
       
return "Out of Stock";
   
} else if (string == "button yellow add2Cart") {
       
return "In Stock";
   
} else {
        console
.log("failed");
   
}
}

and here's the error

$ casperjs xx.js
1   0x1b67967 phantomjs() [0x1b67967]
2   0x1b800a9 phantomjs() [0x1b800a9]
3   0x1b9b42f phantomjs() [0x1b9b42f]
4   0x1b9a217 phantomjs() [0x1b9a217]
5   0x171a61d phantomjs() [0x171a61d]
6   0x1424344 phantomjs() [0x1424344]
7   0x171de7e phantomjs() [0x171de7e]
8   0x171e7bb phantomjs() [0x171e7bb]
9   0x173973f phantomjs() [0x173973f]
10  0x175247e phantomjs() [0x175247e]
11  0x7f20fd phantomjs() [0x7f20fd]
12  0x177e84e phantomjs() [0x177e84e]
13  0xd95a1b phantomjs() [0xd95a1b]
14  0x7f4ff86f00e5 [0x7f4ff86f00e5]
PhantomJS has crashed. Please read the bug reporting guide at
<http://phantomjs.org/bug-reporting.html> and file a bug report.
Segmentation fault (core dumped)
I appreciate any help.



Ken

unread,
May 25, 2017, 7:51:37 PM5/25/17
to CasperJS
Does something like this help? It's a general purpose web automation tool I made using CasperJS. The main idea is use a shell script or batch file to handle the iteration. Instead of having 1 super long-running process. https://github.com/tebelorg/TagUI

Also, I think Scrapy (based on Python) may be a better solution for large-volume data-scraping.
Reply all
Reply to author
Forward
0 new messages