MongoDB - Aggregate muliple rows

2019-05-23 17:23发布

问题:

Suppose the following aggregation query:

Machine.aggregate(  [ { $match : {  $and: [  {"idc": req.query.idc }, {"customer":req.query.customer} ] } } ,{"$group":{_id: {"cluster":"$cluster","idc":"$idc","type":"$type"},"SumCores":{"$sum":"$cores"},"SumMemory": { "$sum":"$memory" }}}, { $sort : { idc : -1, cluster: 1 } } ]);

Which returns:

[
{
    "_id": {
        "cluster": 1,
        "idc": "LH5",
        "type": "Virtual"
    },
    "SumCores": 112,
    "SumMemory": 384
},
{
    "_id": {
        "cluster": 1,
        "idc": "LH5",
        "type": "Physical"
    },
    "SumCores": 192,
    "SumMemory": 768
},
{
    "_id": {
        "cluster": 1,
        "idc": "LH8",
        "type": "Virtual"
    },
    "SumCores": 232,
    "SumMemory": 469
},
{
    "_id": {
        "cluster": 1,
        "idc": "LH8",
        "type": "Physical"
    },
    "SumCores": 256,
    "SumMemory": 1024
}
]

Is there a way to change the aggregation to retrieve this desired output:

[
   {
    "_id": {
        "cluster": 1,
        "idc": "LH5"
    },
    "Virtual": {
        "SumCores": 112,
        "SumMemory": 384
    },
    "Physical": {
        "SumCores": 192,
        "SumMemory": 768
    }
},
{
    "_id": {
        "cluster": 1,
        "idc": "LH8"
    },
    "Virtual": {
        "SumCores": 232,
        "SumMemory": 469
    },
    "Physical": {
        "idc": "LH8",
        "type": "Physical"
    }
}
]

Assumptions:

  • There will always be a Physical and Virtual "pair" per IDC/Cluster

I am happy to receive solutions which:

a) Change the aggregation query b) Receive the existing data and change it into this format by way of a library and/or an algorithm

回答1:

You are doing all the right things in the query already as you need to $group at the level you have in order to get the correct sums. The only thing remaining is to bring it all together.

Personally I would stick with the "pair" in an array as the final output:

Machine.aggregate([ 
    { "$match": { 
        "idc": req.query.idc, "customer": req.query.customer}
    } ,
    { "$group": { 
        "_id": {
            "cluster": "$cluster",
            "idc":"$idc",
            "type": "$type"
        },
        "SumCores": { "$sum":"$cores" },
        "SumMemory": { "$sum":"$memory" }
    }},
    { "$group": {
        "_id": {
            "cluster": "$_id.cluster",
            "idc": "$_id.idc"
        },
        "data": {
            "$push": {
                "type": "$_id.type",
                "SumCores": "$SumCores",
                "SumMemory": "$SumMemory"
            }
        }
    }},
    { "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);

Which would give you:

{
        "_id" : {
                "cluster" : 1,
                "idc" : "LH8"
        },
        "data" : [
                {
                        "type" : "Virtual",
                        "SumCores" : 232,
                        "SumMemory" : 469
                },
                {
                        "type" : "Physical",
                        "SumCores" : 256,
                        "SumMemory" : 1024
                }
        ]
}
{
        "_id" : {
                "cluster" : 1,
                "idc" : "LH5"
        },
        "data" : [
                {
                        "type" : "Virtual",
                        "SumCores" : 112,
                        "SumMemory" : 384
                },
                {
                        "type" : "Physical",
                        "SumCores" : 192,
                        "SumMemory" : 768
                }
        ]
}

But if you really must, then you can filter out the matched elements from the array and put them in their own properties:

Machine.aggregate([ 
    { "$match": { 
        "idc": req.query.idc, "customer": req.query.customer}
    } ,
    { "$group": { 
        "_id": {
            "cluster": "$cluster",
            "idc":"$idc",
            "type": "$type"
        },
        "SumCores": { "$sum":"$cores" },
        "SumMemory": { "$sum":"$memory" }
    }},
    { "$group": {
        "_id": {
            "cluster": "$_id.cluster",
            "idc": "$_id.idc"
        },
        "data": {
            "$push": {
                "type": "$_id.type",
                "SumCores": "$SumCores",
                "SumMemory": "$SumMemory"
            }
        }
    }},
    { "$project": {
        "Physical": {
            "$setDifference": [
                { "$map": {
                    "input": "$data",
                    "as": "el",
                    "in": {
                        "$cond": [
                            { "$eq": [ "$$el.type", "Physical" ] },
                            {
                                "SumCores": "$$el.SumCores",
                                "SumMemory": "$$el.SumMemory"
                            },
                            false
                        ]
                    }
                }},
                [false]
            ]
        },
        "Virtual": {
            "$setDifference": [
                { "$map": {
                    "input": "$data",
                    "as": "el",
                    "in": {
                        "$cond": [
                            { "$eq": [ "$$el.type", "Virtual" ] },
                            {
                                "SumCores": "$$el.SumCores",
                                "SumMemory": "$$el.SumMemory"
                            },
                            false
                        ]
                    }
                }},
                [false]
            ]
        }
    }},
    { "$unwind": "$Physical" },
    { "$unwind": "$Virtual"},
    { "$sort" : { "_id.idc": -1, "_id.cluster": 1 } }
]);

Which gives you your result:

{
        "_id" : {
                "cluster" : 1,
                "idc" : "LH8"
        },
        "Physical" : {
                "SumCores" : 256,
                "SumMemory" : 1024
        },
        "Virtual" : {
                "SumCores" : 232,
                "SumMemory" : 469
        }
}
{
        "_id" : {
                "cluster" : 1,
                "idc" : "LH5"
        },
        "Physical" : {
                "SumCores" : 192,
                "SumMemory" : 768
        },
        "Virtual" : {
                "SumCores" : 112,
                "SumMemory" : 384
        }
}

But the first is just going to give you the same essential data without needing an extra pass through the results.

At any rate it's really just one more $group to bring it all together and then the optional stages if you really must have that data format. But I would personally handle any accessing of the "pair" in the code that needs to deal with it.