I am adding entries to a schema every hour in order to track growth over the course of days while maintaining a current score for the current day. Now I would like to be able to pull the most recent record for each day for the past week. The results would be 6 records at or around midnight for 6 days previous and the 7th being the latest for the current day.
Here is my schema:
var schema = new Schema({
aid: { type: Number }
, name: { type: String }
, score: { type: Number }
, createdAt: { type: Date, default: Date.now() }
})
Edit
I've tried using this static, but it pulls the exact same record 7 times
schema.statics.getLastWeek = function(name, fn) {
var oneday = 60 * 60 * 24
, now = Date.now()
, docs = []
for (var i = 1; i <= 7; i++) {
this.where('name', new RegExp(name, 'i'))
.where('createdAt')
.gte(now - (i * oneday))
.desc('createdAt')
.findOne(function(err,doc){
docs.push(doc)
})
}
}
If I were using SQL I would do a subquery selecting MAXDATE and join it to my main query in order to retrieve the results I want. Anyway to do this here?
Kristina Chodorow gives a detailed recipe for this exact task in her book MongoDB: The Definitive Guide:
Suppose we have a site that keeps track of stock prices. Every few
minutes from 10 a.m. to 4 p.m., it gets the latest price for a stock,
which it stores in MongoDB. Now, as part of a reporting application,
we want to find the closing price for the past 30 days. This can be
easily accomplished using group.
I'm not familiar with Mongoose, however I've tried to adapt her example to your case below. Note I changed the createdAt
default
property from a value to a function and added an extra field datestamp
to your schema:
var oneday = 24 * 60 * 60;
var schema = new Schema({
aid: { type: Number }
, name: { type: String }
, score: { type: Number }
// default: is a function and called every time; not a one-time value!
, createdAt: { type: Date, default: Date.now }
// For grouping by day; documents created on same day should have same value
, datestamp: { type: Number
, default: function () { return Math.floor(Date.now() / oneday); }
}
});
schema.statics.getLastWeek = function(name, fn) {
var oneweekago = Date.now() - (7 * oneday);
ret = this.collection.group({
// Group by this key. One document per unique datestamp is returned.
key: "datestamp"
// Seed document for each group in result array.
, initial: { "createdAt": 0 }
// Update seed document if more recent document found.
, reduce: function(doc, prev) {
if (doc.createdAt > prev.createdAt) {
prev.createdAt = doc.createdAt;
prev.score = doc.score;
// Add other fields, if desired:
prev.name = doc.name;
}
// Process only documents created within past seven days
, condition: { "createdAt" : {"$gt": oneweekago} }
}});
return ret.retval;
// Note ret, the result of group() has other useful fields like:
// total "count" of documents,
// number of unique "keys",
// and "ok" is false if a problem occurred during group()
);
A solution is to use group() to groups records by day. It's fancy, slow and can be blocking (meaning nothing else can run at the same time), but if your record set isn't too huge it's pretty powerful.
Group: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group
As for mongoose, I'm not sure if it supports group() directly, but you can use the node-mongodb-native implementation, by doing something like this (pseudo-code, mostly):
schema.statics.getLastWeek = function(name, cb) {
var keys = {} // can't remember what this is for
var condition = {} // maybe restrict to last 7 days
var initial = {day1:[],day2:[],day3:[],day4:[],day5:[],day6:[],day7:[]}
var reduce = function(obj, prev) {
// prev is basically the same as initial (except with whatever is added)
var day = obj.date.slice(0,-10) // figure out day, however it works
prev["day" + day].push(obj) // create grouped arrays
// you could also do something here to sort by _id
// which is probably also going to get you the latest for that day
// and use it to replace the last item in the prev["day" + 1] array if
// it's > that the previous _id, which could simplify things later
}
this.collection.group(keys, condition, initial, reduce, function(err, results) {
// console.log(results)
var days = results // it may be a property, can't remember
var lastDays = {}
days.forEach(function(day) {
// sort each day array and grab last element
lastDays[day] = days[day].sort(function(a, b) {
return a.date - b.date // check sort syntax, you may need a diff sort function if it's a string
}).slice(-1) // i think that will give you the last one
})
cb(lastDays) // your stuff
})
}
Some more comparisons between groups and map reduce from my blog:
http://j-query.blogspot.com/2011/06/mongodb-performance-group-vs-find-vs.html
There are no docs about the group command in the native driver, so you'll have to peer through the source code here:
https://github.com/christkv/node-mongodb-native/blob/master/lib/mongodb/collection.js
Also for sort, check check https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array/sort for exact syntax
Edit: Better Idea!!!
Just have a special collection called "lastRequestOfDay" and make the _id the day.
Overwrite the value with each new request. It will be super easy to query and fast to write and will always have the last value written each day!
Add another property to the schema named dateAdded
or something.
schema.statics.getLastWeek = function(name, fn) {
var oneday = 60 * 60 * 24
, now = Date.now()
, docs = []
for (var i = 0; i < 7; i++) {
this.where('name', new RegExp(name, 'i'))
.where('createdAt')
.lt(now - (i * oneday))
.gte(now - ((i + 1) * oneday))
.desc('createdAt')
.findOne(function(err,doc){
// might not always find one
docs.push(doc)
})
}
return fn(null, docs)
}
Try something like this:
schema.statics.getLastWeek = function(name, fn) {
var oneday = 60 * 60 * 24
, now = Date.now()
, docs = []
, that = this
function getDay(day){
that.where('name', new RegExp(name, 'i'))
.where('createdAt')
.gte(now - (day * oneday))
.desc('createdAt')
.findOne(function(err,doc){
docs.push(doc)
})
}
for (var i = 1; i <= 7; i++) {
getDay(i);
}
}
Nobody seems to be trying for "close to midnight". :) The issue I saw with the original code was that it checked for a time greater than or equal to x days ago... which will always return the most recent time. I'm confused as to why DeaDEnD's solution returns the same record 7 times, though. Also, you never called fn
, but that's not really the biggest of your concerns, is it?
Try adding on .lt(now - (now % oneday) - (i - 1) * oneday)
(assuming 0-indexed; if it's 1-indexed, change that to say i - 2
)