Find whether someone got a birthday in the next 30

2020-02-05 04:48发布

问题:

Let's say that we got a collection of users, each with a birthday in BSON type date format.

How can we run a query to find out all the users who got a birthday in the next 30 days ?

回答1:

Aggregation framework is definitely the right approach - anything that requires JS on the server is a performance problem, while aggregations all run in the server in native code.

While it's possible to transform the birthday into dates of upcoming birthdays and then do a range query, I prefer to do it a slightly different way myself.

The only "prerequisite is to compute today's day of the year". There are ways to do this in various languages, so this could be done in the application layer before calling the aggregation, passing this number to it. I was going to call mine todayDayOfYear but I realized you can let aggregation framework figure it out based on today, so the only variable will be today's date.

var today=new Date();

I'm assuming document that includes name and birthday, adjust appropriately for variations

var p1 = { "$project" : {
            "_id" : 0,
            "name" : 1,
            "birthday" : 1,
            "todayDayOfYear" : { "$dayOfYear" : today }, 
            "dayOfYear" : { "$dayOfYear" : "$birthday"}
} };

Now, project how many days from today till their next birthday:

var p2 = { "$project" : {
        "name" : 1,
        "birthday" : 1,
        "daysTillBirthday" : { "$subtract" : [
             { "$add" : [ 
                     "$dayOfYear",
             { "$cond" : [{"$lt":["$dayOfYear","$todayDayOfYear"]},365,0 ] }
             ] },
             "$todayDayOfYear"
        ] }
} };

Exclude all but the ones within desired range:

var m = { "$match" : { "daysTillBirthday" : { "$lt" : 31 } } };

Now run the aggregation with:

db.collection.aggregate( p1, p2, m );

to get back a list of names, birthdays and days till birthday for all lucky folks whose birthday is within 30 days.

EDIT

@Sean999 caught an interesting edge case - people who were born in a leap year after February 28th will have their calculation off by one. The following is aggregation that correctly adjusts for that:

var p1 = { "$project" : { 
            "_id" : 0,
            "name" : 1,
            "birthday" : 1, 
            "todayDayOfYear" : { "$dayOfYear" : ISODate("2014-03-09T12:30:51.515Z") },
            "leap" : { "$or" : [ 
                  { "$eq" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 400 ] } ] }, 
                  { "$and" : [ 
                        { "$eq" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 4 ] } ] }, 
                        { "$ne" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 100 ] } ] } ] } ] },
            "dayOfYear" : { "$dayOfYear" : "$birthday" } } };

var p1p = { "$project" : {
                "name" : 1,
                "birthday" : 1,
                "todayDayOfYear" : 1,
                "dayOfYear" : { "$subtract" : [ 
                      "$dayOfYear", 
                      { "$cond" : [ { "$and" : [ "$leap", { "$gt" : [ "$dayOfYear", 59 ] } ] }, 1, 0 ] } ] }
        }
}

p2 and m stay the same as above.

Test input:

db.birthdays.find({},{name:1,birthday:1,_id:0})
{ "name" : "Ally", "birthday" : ISODate("1975-06-12T00:00:00Z") }
{ "name" : "Ben", "birthday" : ISODate("1968-04-03T00:00:00Z") }
{ "name" : "Mark", "birthday" : ISODate("1949-12-23T00:00:00Z") }
{ "name" : "Paul", "birthday" : ISODate("2014-03-04T15:59:05.374Z") }
{ "name" : "Paul", "birthday" : ISODate("2011-02-07T00:00:00Z") }
{ "name" : "Sean", "birthday" : ISODate("2004-01-31T00:00:00Z") }
{ "name" : "Tim", "birthday" : ISODate("2008-02-28T00:00:00Z") }
{ "name" : "Sandy", "birthday" : ISODate("2005-01-31T00:00:00Z") }
{ "name" : "Toni", "birthday" : ISODate("2009-02-28T00:00:00Z") }
{ "name" : "Sam", "birthday" : ISODate("2005-03-31T00:00:00Z") }
{ "name" : "Max", "birthday" : ISODate("2004-03-31T00:00:00Z") }
{ "name" : "Jen", "birthday" : ISODate("1971-04-03T00:00:00Z") }
{ "name" : "Ellen", "birthday" : ISODate("1996-02-28T00:00:00Z") }
{ "name" : "Fanny", "birthday" : ISODate("1996-02-29T00:00:00Z") }
{ "name" : "Gene", "birthday" : ISODate("1996-03-01T00:00:00Z") }
{ "name" : "Edgar", "birthday" : ISODate("1997-02-28T00:00:00Z") }
{ "name" : "George", "birthday" : ISODate("1997-03-01T00:00:00Z") }

Output:

db.birthdays.aggregate( p1, p1p, p2, {$sort:{daysTillBirthday:1}});
{ "name" : "Sam", "birthday" : ISODate("2005-03-31T00:00:00Z"), "daysTillBirthday" : 22 }
{ "name" : "Max", "birthday" : ISODate("2004-03-31T00:00:00Z"), "daysTillBirthday" : 22 }
{ "name" : "Ben", "birthday" : ISODate("1968-04-03T00:00:00Z"), "daysTillBirthday" : 25 }
{ "name" : "Jen", "birthday" : ISODate("1971-04-03T00:00:00Z"), "daysTillBirthday" : 25 }
{ "name" : "Ally", "birthday" : ISODate("1975-06-12T00:00:00Z"), "daysTillBirthday" : 95 }
{ "name" : "Mark", "birthday" : ISODate("1949-12-23T00:00:00Z"), "daysTillBirthday" : 289 }
{ "name" : "Sean", "birthday" : ISODate("2004-01-31T00:00:00Z"), "daysTillBirthday" : 328 }
{ "name" : "Sandy", "birthday" : ISODate("2005-01-31T00:00:00Z"), "daysTillBirthday" : 328 }
{ "name" : "Paul", "birthday" : ISODate("2011-02-07T00:00:00Z"), "daysTillBirthday" : 335 }
{ "name" : "Tim", "birthday" : ISODate("2008-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Toni", "birthday" : ISODate("2009-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Ellen", "birthday" : ISODate("1996-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Fanny", "birthday" : ISODate("1996-02-29T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Edgar", "birthday" : ISODate("1997-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Gene", "birthday" : ISODate("1996-03-01T00:00:00Z"), "daysTillBirthday" : 357 }
{ "name" : "George", "birthday" : ISODate("1997-03-01T00:00:00Z"), "daysTillBirthday" : 357 }
{ "name" : "Paul", "birthday" : ISODate("2014-03-04T15:59:05.374Z"), "daysTillBirthday" : 360 }

You can see that people with same birthday now have same number of days till birthday whether they were born on a leap year or not. Match step can now be performed for the cut-off designed.

EDIT

As of version 3.5.11 there are several date manipulation expressions in aggregation pipeline that make this significantly simpler to write. In particular, the $dateFromParts expression allows constructing a date from various parts, allowing this aggregation:

var today = new Date();
var a1 = {$addFields:{
    today:{$dateFromParts:{year:{$year:today},month:{$month:today},day:{$dayOfMonth:today}}},
    birthdayThisYear:{$dateFromParts:{year:{$year:today}, month:{$month:"$birthday"}, day:{$dayOfMonth:"$birthday"}}}, 
    birthdayNextYear:{$dateFromParts:{year:{$add:[1,{$year:today}]}, month:{$month:"$birthday"}, day:{$dayOfMonth:"$birthday"}}}
}};
var a2 = {$addFields:{
    nextBirthday:{$cond:[ {$gte:[ "$birthdayThisYear", "$today"]}, "$birthdayThisYear", "$birthdayNextYear"]}
}};
var p1 = {$project:{
    name:1, 
    birthday:1, 
    daysTillNextBirthday:{$divide:[ 
        {$subtract:["$nextBirthday", "$today"]}, 
        24*60*60*1000  /* milliseconds in a day */
     ]}, 
    _id:0
}};
var s1 = {$sort:{daysTillNextBirthday:1}};
db.birthdays.aggregate([ a1, a2, p1, s1 ]);

You can set "today" to any date (leap year or not) and see that the calculation is now always correct and much simpler.



回答2:

With the clear thing being that the birth dates are the date of birth and are in the past, but we want to search in the future right? Yeah nice trap.

But we can do, with some projection in aggregation, for one method of solving.

First a little setup for variables we need:

var start_time = new Date(),
    end_time = new Date();

end_time.setDate(end_time.getDate() + 30 );

var monthRange = [ start_time.getMonth() + 1, end_time.getMonth() + 1 ];

var start_string =  start_time.getFullYear().toString() +
    ("0" + (start_time.getMonth()+1)).slice(-2) +
    ("0" + (start_time.getDate()-1)).slice(-2);   

var end_string =  end_time.getFullYear().toString() +
    ("0" + (end_time.getMonth()+1)).slice(-2) +
    ("0" + (end_time.getDate()-1)).slice(-2); 

var start_year = start_time.getFullYear();
var end_year = end_time.getFullYear();

Then run that through aggregate:

db.users.aggregate([
    {"$project": { 
        "name": 1,
        "birthdate": 1,
        "matchYear": {"$concat":[
            // Substituting the year into the current year
            {"$substr":[{"$cond":[
                {"$eq": [{"$month": "$birthdate"}, monthRange[0]]},
                start_year,
                // Being careful to see if we moved into the next year
                {"$cond":[
                    {"$lt": monthRange},
                    start_year,
                    end_year
                ]}
            ]},0,4]},
            {"$cond":[
                {"$lt":[10, {"$month": "$birthdate"}]},
                {"$substr":[{"$month": "$birthdate"},0,2]},
                {"$concat":["0",{"$substr":[{"$month": "$birthdate"},0,2]}]}
            ]},
            {"$cond":[
                {"$lt":[10, {"$dayOfMonth": "$birthdate"}]},
                {"$substr":[{"$dayOfMonth": "$birthdate"},0,2]},
                {"$concat":["0",{"$substr":[{"$dayOfMonth": "$birthdate"},0,2]}]}
            ]}
        ]}
    }},

    // Small optimize for the match stage
    {"sort": { "matchYear": 1}},

    // And match on the range now that it's lexical
    {"$match": { "matchYear": {"$gte": start_string, "$lte": end_string } }}

])

I suppose the same applies for mapReduce if your mind works better that way. But the results would only yield true or false no matter which way you shook it. But you'd probably just need a mapper and the syntax is a bit clearer:

var mapFunction = function () {

    var mDate = new Date( this.birthdate.valueOf() );

     if ( mDate.getMonth() + 1 < monthRange[0] ) {
         mDate.setFullYear(start_year);
     } else if ( monthRange[0] < monthRange[1] ) {
         mDate.setFullYear(start_year);
     } else {
         mDate.setFullYear(end_year);
     }

     var matched = (mDate >= start_time && mDate <= end_time);

     var result = {
         name: this.name,
         birthdate: this.birthdate,
         matchDate: mDate,
         matched: matched
     };

     emit( this._id, result );
};

Then you would pass that in to mapReduce, picking up all the variables that were defined before:

db.users.mapReduce(
    mapFunction, 
    function(){},           // reducer is not called
   { 
       out: { inline: 1 },
       scope: { 
           start_year: start_year,
           end_year: end_year,
           start_time: start_time,
           end_time: end_time,
           monthRange: monthRange 
       } 
   }

)

But really, at least store the "Birth Month" in a real field as part of your user record. Because then you can narrow down the matches and not process your whole collection. Just add the additional $match at the start of the pipeline:

{"$match": "birthMonth": {"$in": monthRange }}

With the field present in the document that will save disk thrashing in the future.

Final Note

The other form that should work is just throwing raw JavaScript into find. That can be done as a shortcut where you don't provide any additional query conditions. But for the confused the documentation is under the $where operator, and essentially the same thing as passing in JavaScript to $where.

However, any attempt at this would just not produce a result. Hence the other methods. Not sure if there was a good reason or if it's a bug.

Anyhow all testing, aside from the earlier year rollover testing, was done on these documents. One result should not appear where the initial starting date was from "2014-03-03".

{ "name" : "bill",  "birthdate" : ISODate("1973-03-22T00:00:00Z") }
{ "name" : "fred",  "birthdate" : ISODate("1974-04-17T00:00:00Z") }
{ "name" : "mary",  "birthdate" : ISODate("1961-04-01T00:00:00Z") }
{ "name" : "wilma", "birthdate" : ISODate("1971-03-17T00:00:00Z") }


回答3:

A solution would be to pass a function to the find Mongo operation. See the inline comments:

// call find
db.users.find(function () {

   // convert BSON to Date object
   var bDate   = new Date(this.birthday * 1000)

       // get the present moment
     , minDate = new Date()

       // add 30 days from this moment (days, hours, minutes, seconds, ms)
     , maxDate = new Date(minDate.getTime() + 30 * 24 * 60 * 60 * 1000);

   // modify the year of the birthday Date object
   bDate.setFullYear(minDate.getFullYear());

   // return a boolean value
   return (bDate > minDate && bDate < maxDate);
});


回答4:

I think the most elegant and usually the most efficient solution is to use the aggregation framework. To obtain birthdays we need to discard all information about the dates except $month and $dayOfMonth. We create new compound fields using those values, pivot on them, and away we go!

This javascript can be executed from the mongo console, and operates on a collection called users with a field called birthday. It returns a list of user IDs, grouped by birthday.

var next30days = [];
var today = Date.now();
var oneday = (1000*60*60*24);
var in30days = Date.now() + (oneday*30);

//  make an array of all the month/day combos for the next 30 days    
for (var i=today;i<in30days;i=i+oneday) {
    var thisday = new Date(i);
    next30days.push({
        "m": thisday.getMonth()+1,
        "d": thisday.getDate()
    });
}

var agg = db.users.aggregate([
    {
        '$project': {
            "m": {"$month": "$birthday"},
            "d": {"$dayOfMonth": "$birthday"}
        }
    },
    {
        "$match": {
            "$or": next30days
        }
    },
    {
        "$group": {
            "_id": {
                "month": "$m",
                "day": "$d",
            },
            "userids": {"$push":"$_id"}
        }
    }
]);

printjson(agg);