Read a file one line at a time in node.js?

2018-12-31 08:17发布

I am trying to read a large file one line at a time. I found a question on Quora that dealt with the subject but I'm missing some connections to make the whole thing fit together.

 var Lazy=require("lazy");
 new Lazy(process.stdin)
     .lines
     .forEach(
          function(line) { 
              console.log(line.toString()); 
          }
 );
 process.stdin.resume();

The bit that I'd like to figure out is how I might read one line at a time from a file instead of STDIN as in this sample.

I tried:

 fs.open('./VeryBigFile.csv', 'r', '0666', Process);

 function Process(err, fd) {
    if (err) throw err;
    // DO lazy read 
 }

but it's not working. I know that in a pinch I could fall back to using something like PHP, but I would like to figure this out.

I don't think the other answer would work as the file is much larger than the server I'm running it on has memory for.

25条回答
妖精总统
2楼-- · 2018-12-31 08:40

With the carrier module:

var carrier = require('carrier');

process.stdin.resume();
carrier.carry(process.stdin, function(line) {
    console.log('got one line: ' + line);
});
查看更多
深知你不懂我心
3楼-- · 2018-12-31 08:42

i use this:

function emitLines(stream, re){
    re = re && /\n/;
    var buffer = '';

    stream.on('data', stream_data);
    stream.on('end', stream_end);

    function stream_data(data){
        buffer += data;
        flush();
    }//stream_data

    function stream_end(){
        if(buffer) stream.emmit('line', buffer);
    }//stream_end


    function flush(){
        var re = /\n/;
        var match;
        while(match = re.exec(buffer)){
            var index = match.index + match[0].length;
            stream.emit('line', buffer.substring(0, index));
            buffer = buffer.substring(index);
            re.lastIndex = 0;
        }
    }//flush

}//emitLines

use this function on a stream and listen to the line events that is will emit.

gr-

查看更多
爱死公子算了
4楼-- · 2018-12-31 08:42

there is a very nice module for reading a file line by line, it's called line-reader

with it you simply just write:

var lineReader = require('line-reader');

lineReader.eachLine('file.txt', function(line, last) {
  console.log(line);
  // do whatever you want with line...
  if(last){
    // or check if it's the last one
  }
});

you can even iterate the file with a "java-style" interface, if you need more control:

lineReader.open('file.txt', function(reader) {
  if (reader.hasNextLine()) {
    reader.nextLine(function(line) {
      console.log(line);
    });
  }
});
查看更多
看淡一切
5楼-- · 2018-12-31 08:48

You don't have to open the file, but instead, you have to create a ReadStream.

fs.createReadStream

Then pass that stream to Lazy

查看更多
余生请多指教
6楼-- · 2018-12-31 08:50

I wanted to tackle this same problem, basically what in Perl would be:

while (<>) {
    process_line($_);
}

My use case was just a standalone script, not a server, so synchronous was fine. These were my criteria:

  • The minimal synchronous code that could reuse in many projects.
  • No limits on file size or number of lines.
  • No limits on length of lines.
  • Able to handle full Unicode in UTF-8, including characters beyond the BMP.
  • Able to handle *nix and Windows line endings (old-style Mac not needed for me).
  • Line endings character(s) to be included in lines.
  • Able to handle last line with or without end-of-line characters.
  • Not use any external libraries not included in the node.js distribution.

This is a project for me to get a feel for low-level scripting type code in node.js and decide how viable it is as a replacement for other scripting languages like Perl.

After a surprising amount of effort and a couple of false starts this is the code I came up with. It's pretty fast but less trivial than I would've expected: (fork it on GitHub)

var fs            = require('fs'),
    StringDecoder = require('string_decoder').StringDecoder,
    util          = require('util');

function lineByLine(fd) {
  var blob = '';
  var blobStart = 0;
  var blobEnd = 0;

  var decoder = new StringDecoder('utf8');

  var CHUNK_SIZE = 16384;
  var chunk = new Buffer(CHUNK_SIZE);

  var eolPos = -1;
  var lastChunk = false;

  var moreLines = true;
  var readMore = true;

  // each line
  while (moreLines) {

    readMore = true;
    // append more chunks from the file onto the end of our blob of text until we have an EOL or EOF
    while (readMore) {

      // do we have a whole line? (with LF)
      eolPos = blob.indexOf('\n', blobStart);

      if (eolPos !== -1) {
        blobEnd = eolPos;
        readMore = false;

      // do we have the last line? (no LF)
      } else if (lastChunk) {
        blobEnd = blob.length;
        readMore = false;

      // otherwise read more
      } else {
        var bytesRead = fs.readSync(fd, chunk, 0, CHUNK_SIZE, null);

        lastChunk = bytesRead !== CHUNK_SIZE;

        blob += decoder.write(chunk.slice(0, bytesRead));
      }
    }

    if (blobStart < blob.length) {
      processLine(blob.substring(blobStart, blobEnd + 1));

      blobStart = blobEnd + 1;

      if (blobStart >= CHUNK_SIZE) {
        // blobStart is in characters, CHUNK_SIZE is in octets
        var freeable = blobStart / CHUNK_SIZE;

        // keep blob from growing indefinitely, not as deterministic as I'd like
        blob = blob.substring(CHUNK_SIZE);
        blobStart -= CHUNK_SIZE;
        blobEnd -= CHUNK_SIZE;
      }
    } else {
      moreLines = false;
    }
  }
}

It could probably be cleaned up further, it was the result of trial and error.

查看更多
心情的温度
7楼-- · 2018-12-31 08:52

Old topic, but this works:

var rl = readline.createInterface({
      input : fs.createReadStream('/path/file.txt'),
      output: process.stdout,
      terminal: false
})
rl.on('line',function(line){
     console.log(line) //or parse line
})

Simple. No need for an external module.

查看更多
登录 后发表回答