I've got a problem with this code in node.js. I want to 开发者_Python百科recursively walk through a directory tree and apply the callback action
to every file in the tree. This is my code at the moment:
var fs = require("fs");
// General function
var dive = function (dir, action) {
// Assert that it's a function
if (typeof action !== "function")
action = function (error, file) { };
// Read the directory
fs.readdir(dir, function (err, list) {
// Return the error if something went wrong
if (err)
return action(err);
// For every file in the list
list.forEach(function (file) {
// Full path of that file
path = dir + "/" + file;
// Get the file's stats
fs.stat(path, function (err, stat) {
console.log(stat);
// If the file is a directory
if (stat && stat.isDirectory())
// Dive into the directory
dive(path, action);
else
// Call the action
action(null, path);
});
});
});
};
The problem is that in the for each loop stat is called for every file via the variable path
. When the callback is called, path
already has another value and so it dive
s into the wrong directories or calls the action
for the wrong files.
Probably this problem could easily get solved by using fs.statSync
, but this is not the solution I would prefer, since it is blocking the process.
var path = dir + "/" + file;
You forgot to make path
a local variable. Now it won't be changed behind your back in the loop.
Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.
Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.
var dir = require('node-dir');
dir.files(__dirname, function(err, files) {
if (err) throw err;
console.log(files);
//we have an array of files now, so now we'll iterate that array
files.forEach(function(filepath) {
actionOnFile(null, filepath);
})
});
Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.
var dir = require('node-dir');
dir.subdirs(__dirname, function(err, subdirs) {
if (err) throw err;
console.log(subdirs);
//we have an array of subdirs now, so now we'll iterate that array
subdirs.forEach(function(filepath) {
actionOnDir(null, filepath);
})
});
Another suitable library is filehound. It supports file filtering (if required), callbacks and promises.
For example:
const Filehound = require('filehound');
function action(file) {
console.log(`process ${file}`)
}
Filehound.create()
.find((err, files) => {
if (err) {
return console.error(`error: ${err}`);
}
files.forEach(action);
});
The library is well documented and provides numerous examples of common use cases. https://github.com/nspragg/filehound
Disclaimer: I'm the author.
Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:
- Better error management support
- A global completion callback which is called when the exploration is complete
The code:
/**
* dir: path to the directory to explore
* action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
* done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
*/
var walk = function(dir, action, done) {
// this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
var dead = false;
// this flag will store the number of pending async operations
var pending = 0;
var fail = function(err) {
if(!dead) {
dead = true;
done(err);
}
};
var checkSuccess = function() {
if(!dead && pending == 0) {
done();
}
};
var performAction = function(file, stat) {
if(!dead) {
try {
action(file, stat);
}
catch(error) {
fail(error);
}
}
};
// this function will recursively explore one directory in the context defined by the variables above
var dive = function(dir) {
pending++; // async operation starting after this line
fs.readdir(dir, function(err, list) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else { // iterate over the files
list.forEach(function(file) {
if(!dead) { // if we are already dead, we don't do anything
var path = dir + "/" + file;
pending++; // async operation starting after this line
fs.stat(path, function(err, stat) {
if(!dead) { // if we are already dead, we don't do anything
if (err) {
fail(err); // if an error occured, let's fail
}
else {
if (stat && stat.isDirectory()) {
dive(path); // it's a directory, let's explore recursively
}
else {
performAction(path, stat); // it's not a directory, just perform the action
}
pending--; checkSuccess(); // async operation complete
}
}
});
}
});
pending--; checkSuccess(); // async operation complete
}
}
});
};
// start exploration
dive(dir);
};
Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:
- https://github.com/pvorb/node-dive
- https://github.com/coolaj86/node-walk
There is an NPM module for this:
npm dree
Example:
const dree = require('dree');
const options = {
depth: 5, // To stop after 5 directory levels
exclude: /dir_to_exclude/, // To exclude some pahts with a regexp
extensions: [ 'txt', 'jpg' ] // To include only some extensions
};
const fileCallback = function (file) {
action(file.path);
};
let tree;
// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);
// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);
// Here tree contains an object representing the whole directory tree (filtered with options)
function loop( ) {
var item = list.shift( );
if ( item ) {
// content of the loop
functionWithCallback( loop );
} else {
// after the loop has ended
whatever( );
}
}
精彩评论