MongoDB dateDiff between multiple documents

2019-07-17 02:02发布

问题:

I have collection in my mongoDB which stores service given to customer along with their email address something like below

{
    "_id" : ObjectId("56a84627f8fd4a136c0e944a"),    
    "Vehicle" : "Honda",
    "ServiceSelected" : "FULL SERVICE",
    "FullName" : "xyz",
    "Email" : "xyz@xyz.com",    
    "BookingTime" : ISODate("2015-12-27T06:00:00.000Z")
},

{
    "_id" : ObjectId("56a84627f8fd4a136c0e944b"),    
    "Vehicle" : "AUDI",
    "ServiceSelected" : "FLAT TYRE",
    "FullName" : "abc",
    "Email" : "abc@abc.com",    
    "BookingTime" : ISODate("2015-12-26T06:00:00.000Z")
},

{
    "_id" : ObjectId("56a84627f8fd4a136c0e944c"),    
    "Vehicle" : "BMW",
    "ServiceSelected" : "OTHERS",
    "FullName" : "def",
    "Email" : "def@def.com",    
    "BookingTime" : ISODate("2015-12-25T06:00:00.000Z")
},

{
    "_id" : ObjectId("56a84627f8fd4a136c0e944d"),    
    "Vehicle" : "BMW",
    "ServiceSelected" : "OTHERS",
    "FullName" : "def",
    "Email" : "def@def.com",    
    "BookingTime" : ISODate("2015-12-30T06:00:00.000Z")
},

{
    "_id" : ObjectId("56a84627f8fd4a136c0e944a"),    
    "Vehicle" : "Honda",
    "ServiceSelected" : "FULL SERVICE",
    "FullName" : "xyz",
    "Email" : "xyz@xyz.com",    
    "BookingTime" : ISODate("2016-01-27T06:00:00.000Z")
}

From the above collection I want to fetch all the documents that have taken our service with a gap of at-least 30 days i.e. from the above collection "Email" : "xyz@xyz.com" should be returned but not "Email" : "def@def.com" as the second service was taken with in 5 days.

I know there is flaw in the design and an additional flag can be set while inserting the record from the application but I need to fetch the data for the existing records.

回答1:

You need to use the $min and $max operators which respectively return the minimum and maximum value for "BookingTime" in your $group stage. The last stage in the pipeline is the $redact stage where you use a simple "date" math using the $divide and $subtract arithmetic operators.to return those documents where the number of days between first "service" and last "service" is greater than 30

db.collection.aggregate( [ 
    { "$group": { 
        "_id": "$Email",  
        "date1": { "$min": "$BookingTime" }, 
        "date2": { "$max": "$BookingTime" } 
    }}, 
    { "$redact": { 
        "$cond": [ 
             { "$gte": [ 
                 { "$divide": [ 
                     { "$subtract": [ "$date2", "$date1" ] }, 
                     1000 * 60 * 60 * 24 
                 ]}, 
                 30 
             ]}, 
             "$$KEEP", 
             "$$PRUNE" 
        ] 
    }}
])

Which returns:

{
        "_id" : "xyz@xyz.com",
        "date1" : ISODate("2015-12-27T06:00:00Z"),
        "date2" : ISODate("2016-01-27T06:00:00Z")
}

Another way to do this is by using the $cond operator in a $project stage to avoid a collection scan.

db.collection.aggregate( [ 
    { "$group": { 
        "_id": "$Email", 
        "date1": { "$min": "$BookingTime" },
        "date2": { "$max": "$BookingTime" }, 
        "count": { "$sum": 1 } 
    }},
    { "$match": { "count": { "$gte": 2 } } }, 
    { "$project": { 
        "emails": { 
            "$cond": [ 
                { "$gte": [ 
                    { "$divide": [ 
                        { "$subtract": [ "$date2", "$date1" ] }, 
                        1000 * 60 * 60 * 24 
                    ]}, 
                    30 
                ] }, 
                "$_id", 
                false 
            ] 
        } 
    }}, 
    { "$match": { "emails": { "$ne": false } } } 
])


回答2:

You can get first sales date and last sales date by $min and $max:

db.services.aggregate({
    $group: {
       "_id" :"$Email",
       lastSalesDate: { $max: "$BookingTime" },
       firstSalesDate: { $min: "$BookingTime" }
    }   
  }
)

After that you can add filter based on lastSalesDate. You can calculate ISO date which 30 days before. ex. ISODate("2015-12-28T00:00:00.000Z"). By $lt , you will get customers of 30 days before.

db.services.aggregate(
  {
    $group: {
       "_id" :"$Email",
       lastSalesDate: { $max: "$BookingTime" },
       firstSalesDate: { $min: "$BookingTime" }
    }   
  },
  { 
    $match : {
       "lastSalesDate" : { $lt: ISODate("2015-12-28T00:00:00.000Z") }
    }
  }
)

Results like:

{ 
    "_id" : "abc@abc.com", 
    "lastSalesDate" : ISODate("2015-12-26T06:00:00.000+0000"), 
    "firstSalesDate" : ISODate("2015-12-26T06:00:00.000+0000")
}


回答3:

This is what I used finally

db.services.aggregate( 
    {$group: {
       "_id" :"$Email",
       count:{$sum:1},
       lastSalesDate: { $max: "$BookingTime" },
       firstSalesDate: { $min: "$BookingTime" }
    },
    {$project:{
            _id:1,count:1,dateDifference: { $divide:[ {$subtract: [ "$lastSalesDate", "$firstSalesDate" ]},86400000] }
        }
    },
    {$match:{
          count:{$gt:1},dateDifference:{$gt:20}
        }
    }
  }
)

Count > 1 helped to filter the records which never repeated and datedifferentce > 20 is for days as I already converted milliseconds to days using division operation.