How to process a stream of lines from a file one-by-one?

47 views
Skip to first unread message

Marco Ippolito

unread,
Apr 25, 2015, 8:09:36 PM4/25/15
to nod...@googlegroups.com
Hi all,
this is my code:

#!/usr/bin/env node

var path = require('path');
var fs = require('fs');

var util = require('util');
var stream = require('stream');
var es = require('event-stream');
var normalized_path = path.normalize(process.argv[2])
var unified_unique_urls_file_path = path.join(process.cwd(), normalized_path)
var unified_unique_urls_file_name = normalized_path + "_unified_unique.txt"
var unified_unique_urls_file = unified_unique_urls_file_path + "/" + unified_unique_urls_file_name
//console.log(unified_unique_urls_file)
// http://stackoverflow.com/questions/16010915/parsing-huge-logfiles-in-node-js-read-in-line-by-line

var lineNr = 1
var rs = fs.createReadStream(unified_unique_urls_file)

rs.pipe(es.split()) // split stream to break on newlines
rs.pipe(es.map(function(line) { // es.map turns this async function into a stream
    // pause the readstream
    rs.pause();
    lineNr += 1;
    (function() {
        // process line here and call s.resume() when ready
        callback(line)
        //logMemoryUsage(lineNr);
        // resume the readstream
        rs.resume();
    })();
})
.on('error', function() {
    console.log('Error while reading file.');
    })
.on('end', function() {
    console.log('Read entire file.');
    })
);

function callback(line) {
    var lineS = line.toString()
    var lengthy_lines = [];
    var shorty_lines = [];
    //console.log("Ecco i dati in input: ", lineS);
    var lengthy = lineS.length;
    console.log("lenght of the line: ", lengthy);
    if (lengthy > 10)
        lengthy_lines.push(lineS);
    else
        shorty_lines.push(lineS);

    console.log("lenghty lines are: ", lengthy_lines)
    console.log("short lines are: ", shorty_lines)
}

and the objective is to read the txt file (located in a directory) line-by-line and get, for each line, the length of the line.
So...the objective is to read in a stream mode the file and process it line-by-line.

but the output is:
time ./stream_callback.js example_1
lenght of the line:  415
lenghty lines are:  [ 'Prova prova prova\nOur seat and site license programs allow schools, organizations and corporations to purchase digital access for multiple users at a group discount\nLa recaudación por las películas en ‘streaming’ se multiplicó por 10 en Europa en cinco años\nLe fantastiche immagini del telescopio spaziale Hubble: in un quarto di secolo ha cambiato il nostro punto di vista sul galassie, stelle, pianeti e nebulose\n' ]
short lines are:  []

instead of getting the lenght of line, it gives the total length...
Any ideas?

Looking forward to your kind help.
Marco
stream_callback_dir.tar.gz

Bruno Jouhier

unread,
Apr 26, 2015, 11:56:15 AM4/26/15
to nod...@googlegroups.com
FWIW ez-streams lets you handle it with a single read.parse.reduce chain:

var ez = require('ez-streams');

function analyze(filename, callback) {
  ez.devices.file.text.reader(filename)
    .transform(ez.transforms.lines())
    .reduce(callback, function(cb, result, line) {
      result[line.length > 10 ? 'longLines' : 'shortLines'].push(line);
      cb(null, result);
    }, { shortLines: [], longLines: [] });
}
  
Bruno
Reply all
Reply to author
Forward
0 new messages