On Tue, Aug 14, 2012 at 5:50 AM, MiaoMiao <
liy...@gmail.com> wrote:
> I tried to use Node.js to process an apache log file of 500MB, converting
> its syntax from
>
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
>
> to
>
> ip.ip.ip.ip - - 02/Aug/2012:05:01:17 GET /path/of/access/ HTTP/1.1 302
> 26
>
> , then write to another text file.
>
> For better memory control and performance, I used `fs.createReadStream` and
> `fs.createWriteStream`, but only managed to write the first line into
> `output.txt`, because the script end with an error:
>
> `{ [Error: EBADF, write] errno: 9, code: 'EBADF' }`
>
> Am I doing anything wrong?
>
> Here I posted some info that may help debug.
>
> Head of `input.txt`:
>
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
> ip.ip.ip.ip - - [02/Aug/2012:05:01:18 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
>
> Content of `output.txt`:
>
> ip.ip.ip.ip - - [02/Aug/2012:05:01:17 -0600] "GET /path/of/access/
> HTTP/1.1" 302 26
>
>
> The whole script:
>
> var fs = require('fs');
> var data ='';
> var n=0; //For line control
> var r = fs.createReadStream('./input.txt',{
> encoding: 'ascii',
> start:0,
> // end: 100000,
> });
> var w = fs.createWriteStream('./output.txt',{
> encoding:'ascii'
> });
> function put(line){ //write into w;
> ++n;
> w.write(line+'\n');
> }
> function end(){
> r.destroy();
> w.destroy();
> }
> function onData(chunk){
> var hasNewline = chunk.indexOf('\n')!==-1;
> if(hasNewline){
> var arr = chunk.split('\n');
> var first = arr.shift();
> var last = arr.pop();
> data+=first;
> put(data); //write a complete line
> arr.forEach(function(line){
> put(line); //write a complete line
> });
> data=last;
> }else{
> data+=chunk;
> }
> if(n>100){
> end();
> }
> }
> function onErr(e){
> console.log(e);
> }
>
> r.addListener( "data", onData);
> r.addListener( "end", end);
> r.addListener('error',onErr);
> w.addListener('error',onErr);
Can you post the full stack trace that you get? Also, with what
version of node is this?