开发者

Walking a directory with Node.js [duplicate]

开发者 https://www.devze.com 2023-03-27 22:18 出处:网络
This question already has answers here: What is the purpose of the var keyword and when should I use it (or omit it)?
This question already has answers here: What is the purpose of the var keyword and when should I use it (or omit it)? (19 answers) JavaScript closure inside loops – simple practical example (44 answers) Closed last year.

I've got a problem with this code in node.js. I want to 开发者_Python百科recursively walk through a directory tree and apply the callback action to every file in the tree. This is my code at the moment:

var fs = require("fs");

// General function
var dive = function (dir, action) {
  // Assert that it's a function
  if (typeof action !== "function")
    action = function (error, file) { };

  // Read the directory
  fs.readdir(dir, function (err, list) {
    // Return the error if something went wrong
    if (err)
      return action(err);

    // For every file in the list
    list.forEach(function (file) {
      // Full path of that file
      path = dir + "/" + file;
      // Get the file's stats
      fs.stat(path, function (err, stat) {
        console.log(stat);
        // If the file is a directory
        if (stat && stat.isDirectory())
          // Dive into the directory
          dive(path, action);
        else
          // Call the action
          action(null, path);
      });
    });
  });
};

The problem is that in the for each loop stat is called for every file via the variable path. When the callback is called, path already has another value and so it dives into the wrong directories or calls the action for the wrong files.

Probably this problem could easily get solved by using fs.statSync, but this is not the solution I would prefer, since it is blocking the process.


var path = dir + "/" + file;

You forgot to make path a local variable. Now it won't be changed behind your back in the loop.


Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.

Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.

var dir = require('node-dir');

dir.files(__dirname, function(err, files) {
  if (err) throw err;
  console.log(files);
  //we have an array of files now, so now we'll iterate that array
  files.forEach(function(filepath) {
    actionOnFile(null, filepath);
  })
});

Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.

var dir = require('node-dir');

dir.subdirs(__dirname, function(err, subdirs) {
  if (err) throw err;
  console.log(subdirs);
  //we have an array of subdirs now, so now we'll iterate that array
  subdirs.forEach(function(filepath) {
    actionOnDir(null, filepath);
  })
});


Another suitable library is filehound. It supports file filtering (if required), callbacks and promises.

For example:

const Filehound = require('filehound');

function action(file) {
  console.log(`process ${file}`)
}

Filehound.create()
.find((err, files) => {
    if (err) {
        return console.error(`error: ${err}`);
    }

    files.forEach(action);
});

The library is well documented and provides numerous examples of common use cases. https://github.com/nspragg/filehound

Disclaimer: I'm the author.


Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:

  • Better error management support
  • A global completion callback which is called when the exploration is complete

The code:

/**
 * dir: path to the directory to explore
 * action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
 * done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
 */
var walk = function(dir, action, done) {

    // this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
    var dead = false;

    // this flag will store the number of pending async operations
    var pending = 0;

    var fail = function(err) {
        if(!dead) {
            dead = true;
            done(err);
        }
    };

    var checkSuccess = function() {
        if(!dead && pending == 0) {
            done();
        }
    };

    var performAction = function(file, stat) {
        if(!dead) {
            try {
                action(file, stat);
            }
            catch(error) {
                fail(error);
            }
        }
    };

    // this function will recursively explore one directory in the context defined by the variables above
    var dive = function(dir) {
        pending++; // async operation starting after this line
        fs.readdir(dir, function(err, list) {
            if(!dead) { // if we are already dead, we don't do anything
                if (err) {
                    fail(err); // if an error occured, let's fail
                }
                else { // iterate over the files
                    list.forEach(function(file) {
                        if(!dead) { // if we are already dead, we don't do anything
                            var path = dir + "/" + file;
                            pending++; // async operation starting after this line
                            fs.stat(path, function(err, stat) {
                                if(!dead) { // if we are already dead, we don't do anything
                                    if (err) {
                                        fail(err); // if an error occured, let's fail
                                    }
                                    else {
                                        if (stat && stat.isDirectory()) {
                                            dive(path); // it's a directory, let's explore recursively
                                        }
                                        else {
                                            performAction(path, stat); // it's not a directory, just perform the action
                                        }
                                        pending--; checkSuccess(); // async operation complete
                                    }
                                }
                            });
                        }
                    });
                    pending--; checkSuccess(); // async operation complete
                }
            }
        });
    };

    // start exploration
    dive(dir);
};


Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:

  • https://github.com/pvorb/node-dive
  • https://github.com/coolaj86/node-walk


There is an NPM module for this:

npm dree

Example:

const dree = require('dree');
const options = {
    depth: 5,                        // To stop after 5 directory levels
    exclude: /dir_to_exclude/,       // To exclude some pahts with a regexp
    extensions: [ 'txt', 'jpg' ]     // To include only some extensions
};

const fileCallback = function (file) {
    action(file.path);
};

let tree;

// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);

// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);

// Here tree contains an object representing the whole directory tree (filtered with options)


function loop( ) {
    var item = list.shift( );
    if ( item ) {
        // content of the loop
        functionWithCallback( loop );
    } else {
        // after the loop has ended
        whatever( );
    }
}
0

精彩评论

暂无评论...
验证码 换一张
取 消