in nodejs, how to stop a FOR loop until mongodb ca

2020-06-17 02:12发布

问题:

Please look at the code snippet below. I have an array of JSON objects called 'stuObjList'. I want to loop thru the array to find specific JSON objects with a certain flag set, and then make a db call to retrieve more data.

Of course, the FOR loop doesn't wait for the db call to return and reaches the end of with j == length. And when the db call returns, the index 'j' is beyond the array index. I understand how node.js works and this is the expected behavior.

My question is, what is the work around here. How can I achieve what I am trying to achieve?

...............
...............
...............
else
{
  console.log("stuObjList.length: " + stuObjList.length);
  var j = 0;
  for(j = 0; j < stuObjList.length; j++)
  {
    if(stuObjList[j]['honor_student'] != null)
    {     
      db.collection("students").findOne({'_id' : stuObjList[j]['_id'];}, function(err, origStuObj)
      {
        var marker = stuObjList[j]['_id'];
        var major = stuObjList[j]['major'];
      });
    }

    if(j == stuObjList.length)
    {
      process.nextTick(function()
      {
        callback(stuObjList);
      });
    }
  }
}
});

回答1:

"async" is an very popular module for abstracting away asynchronous looping and making your code easier to read/maintain. For example:

var async = require('async');

function getHonorStudentsFrom(stuObjList, callback) {

    var honorStudents = [];

    // The 'async.forEach()' function will call 'iteratorFcn' for each element in
    // stuObjList, passing a student object as the first param and a callback
    // function as the second param. Run the callback to indicate that you're
    // done working with the current student object. Anything you pass to done()
    // is interpreted as an error. In that scenario, the iterating will stop and
    // the error will be passed to the 'doneIteratingFcn' function defined below.
    var iteratorFcn = function(stuObj, done) {

        // If the current student object doesn't have the 'honor_student' property
        // then move on to the next iteration.
        if( !stuObj.honor_student ) {
            done();
            return; // The return statement ensures that no further code in this
                    // function is executed after the call to done(). This allows
                    // us to avoid writing an 'else' block.
        }

        db.collection("students").findOne({'_id' : stuObj._id}, function(err, honorStudent)
        {
            if(err) {
                done(err);
                return;
            }

            honorStudents.push(honorStudent);
            done();
            return;
        });
    };

    var doneIteratingFcn = function(err) {
        // In your 'callback' implementation, check to see if err is null/undefined
        // to know if something went wrong.
        callback(err, honorStudents);
    };

    // iteratorFcn will be called for each element in stuObjList.
    async.forEach(stuObjList, iteratorFcn, doneIteratingFcn);
}

So you could use it like this:

getHonorStudentsFrom(studentObjs, function(err, honorStudents) {
    if(err) {
      // Handle the error
      return;
    }

    // Do something with honroStudents
});

Note that .forEach() will call your iterator function for each element in stuObjList "in parallel" (i.e., it won't wait for one iterator function to finish being called for one array element before calling it on the next array element). This means that you can't really predict the order in which the iterator functions--or more importantly, the database calls--will run. End result: unpredictable order of honor students. If the order matters, use the .forEachSeries() function.



回答2:

Ah the beauty and frustration of thinking asynchronously. Try this:

...............
...............
...............
else
{
  console.log("stuObjList.length: " + stuObjList.length);
  var j = 0, found = false, step;
  for(j = 0; j < stuObjList.length; j++)
  {
    if(stuObjList[j]['honor_student'] != null)
    {     
      found = true;
      step = j;
      db.collection("students").findOne({'_id' : stuObjList[j]['_id'];}, function(err, origStuObj)
      {
        var marker = stuObjList[step]['_id']; // because j's loop has moved on
        var major = stuObjList[step]['major'];
        process.nextTick(function()
        {
          callback(stuObjList);
        });
      });
    }

  }
  if (!found) {
    process.nextTick(function()
    {
      callback(stuObjList);
    });
  }
}
});

If you find your "when I'm done" steps are getting complicated, extract them to another function, and just call it from each spot. In this case since it was only 2 lines, it seemed fair to duplicate.



回答3:

given the requirement, you can also use underscore's "filter" method http://documentcloud.github.com/underscore/#filter

var honor_students = _.filter(stuObjList, function(stud) { return stu['honor_student'] != null });
if (honor_students.length === 0) {
  process.nextTick(function() { callback(stuObjList); });
} else {
  var honor_students_with_more_data = [];
  for (var i = 0; i < honor_students.length; i++) {
    db.collection("students").findOne({'_id' : honor_students[i]['_id'];}, function(err, origStuObj) {
      // do something with retrieved data
      honor_students_with_more_data.push(student_with_more_data);
      if (honor_students_with_more_data.length === honor_students.length) {
        process.nextTick(function() { callback(stuObjList); });
      }
    }
  }
}


回答4:

And when the db call returns, the index 'j' is beyond the array index.

It seems to me that you need to take a "copy" of j on each loop iteration. You can make this with closures.

if(stuObjList[j]['honor_student'] != null)
{

    (function(j_copy){
        db.collection("students").findOne({'_id' : stuObjList[j_copy]['_id'];}, function(err, origStuObj)
        {
            var marker = stuObjList[j_copy]['_id'];
            var major = stuObjList[j_copy]['major'];
        });
    })(j)

}

This way you are saving j`s state on each iteration. This state is saved inside each IIFE. You will have as many saved states - as the for loops. When the DB returns:

var marker = stuObjList[j_copy]['_id'];

j_copy will keep the value of the original j , which it has in the moment of

if(stuObjList[j]['honor_student'] != null)

I know my explaining skills are very bad, but i hope you can understand what i mean.

Edit: This way we are using immediately invoked function and its scope to keep separate private copy of j. On each iteration new IIFE is created with its own private scope. In this scope - on each for iteration we do j_copy = j. And this j_copy can be used inside IIFE without of being overwritten by the for loop each time.