I'm attempting to extract a single line of a file, given that I know the pathname and the line number, ideally I'd like to do this without开发者_如何转开发 reading any more of the file than is necessary.
For the purpose I'm using here, it doesn't matter if this is async or sync.
My current (bad) implementation looks like this:
function get_line(filename, line_no, callback) {
line_no = parseInt(line_no);
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
for (var l in lines) {
if (l == line_no - 1) {
callback(null, lines[l].trim());
return;
}
}
throw new Error('File end reached without finding line');
}
I tried to do something with a createReadStream, but the data events never seemed to fire. Can anyone provide a direct solution to this problem, or point me towards some NodeJS filesystem interaction documentation that is a little more example driven than the standard library API docs?
With readable stream
var fs = require('fs');
function get_line(filename, line_no, callback) {
var stream = fs.createReadStream(filename, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: 0666,
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
// The next lines should be improved
var lines = fileData.split("\n");
if(lines.length >= +line_no){
stream.destroy();
callback(null, lines[+line_no]);
}
});
stream.on('error', function(){
callback('Error', null);
});
stream.on('end', function(){
callback('File end reached without finding line', null);
});
}
get_line('./file.txt', 1, function(err, line){
console.log('The line: ' + line);
})
Direct solution:
You should use the slice method instead of a loop.
var fs = require('fs');
function get_line(filename, line_no, callback) {
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
if(+line_no > lines.length){
throw new Error('File end reached without finding line');
}
callback(null, lines[+line_no]);
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
for (var l in lines) isn't the most efficient way for looping over an array, you should do this instead:
for(var i = 0, iMax = lines.length; i < iMax; i++){/* lines[i] */ }
The asynchronous way:
var fs = require('fs');
function get_line(filename, line_no, callback) {
fs.readFile(filename, function (err, data) {
if (err) throw err;
// Data is a buffer that we need to convert to a string
// Improvement: loop over the buffer and stop when the line is reached
var lines = data.toString('utf-8').split("\n");
if(+line_no > lines.length){
return callback('File end reached without finding line', null);
}
callback(null, lines[+line_no]);
});
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
without reading any more of the file than is necessary
EDIT: the module is unmaintained, I recommend to use other modules to read line by line, for example, using transform streams: http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
With a BufferedReader:
var n = 10;
var l = null;
//Internally it uses a buffer, default 16KB, but you can reduce it to, for example, 4KB doing:
//new BufferedReader ("file", { encoding: "utf8", bufferSize: 4*1024 })
new BufferedReader ("file", { encoding: "utf8" })
.on ("error", function (error){
//error
console.log (error);
})
.on ("line", function (line){
if (!--n){
l = line;
//With interrupt you can stop the reading
this.interrupt ();
}
})
.on ("end", function (){
//your Nth line!
console.log (l);
})
.read ();
You can improve a lot the performance of FGRibreau answer by deleting previous data in "fileData" variable.
function(file, line_no, cb){
var stream = fs.createReadStream(file, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: '0666',
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
var lines = fileData.split('\n');
if(lines.length >= +line_no){
stream.destroy();
cb(null, lines[+line_no]);
}
// Add this else condition to remove all unnecesary data from the variable
else
fileData = Array(lines.length).join('\n');
});
stream.on('error', function(){
cb('Error', null);
});
stream.on('end', function(){
cb('File end reached without finding line', null);
});
};
Using a 70000 lines file, to display line n°50000 I got those results:
real 0m3.504s
user 0m0.000s
sys 0m0.015s
For the same example with the else I got the following:
real 0m0.540s
user 0m0.015s
sys 0m0.031s
This is also implies a much lower memory consumption.
精彩评论