Select sub-documents where field's value is in

2020-04-10 01:14发布

问题:

I want to filter according the sub documents, but actually I am repeating the document for each sub document. I want one document and a list of sub documents if that is the case.

My data looks like:

{
    "_id" : ObjectId("582eeb5f75f58055246bd22d"),
    "filename" : "file1",
    "cod" : NumberLong(90),
    "subdocs" : [
        {
            "length" : NumberLong(10),
            "desc" : "000"
        },
        {
            "length" : NumberLong(15),
            "desc" : "011"
        },
        {
            "length" : NumberLong(30),
            "desc" : "038"
        }
    ]
}
{
    "_id" : ObjectId("582eeb5f75f58055246bd22e"),
    "filename" : "file2",
    "cod" : NumberLong(95),
    "subdocs" : [
        {
            "length" : NumberLong(11),
            "desc" : "000"
        },
        {
            "length" : NumberLong(21),
            "desc" : "018"
        },
        {
            "length" : NumberLong(41),
            "desc" : "008"
        }
    ]
}

I am using this query to filter for the desc (000, 011) on the subdocs

db.ftmp.aggregate( 
    { $match: 
        { "subdocs.desc": 
            { $in: ["000", "011"] } 
        }
    }, 
    { $unwind : "$subdocs" }, 
    { $match : 
        { "subdocs.desc" : 
            { $in:["000", "011"] } 
        }
    }
)

But the result shows 3 documents, 1 document for each sub-document that matches with that query.

{
    "_id" : ObjectId("582eeb5f75f58055246bd22d"),
    "filename" : "file1",
    "cod" : NumberLong(90),
    "subdocs" : {
        "length" : NumberLong(10),
        "desc" : "000"
    }
}
{
    "_id" : ObjectId("582eeb5f75f58055246bd22d"),
    "filename" : "file1",
    "cod" : NumberLong(90),
    "subdocs" : {
        "length" : NumberLong(15),
        "desc" : "011"
    }
}
{
    "_id" : ObjectId("582eeb5f75f58055246bd22e"),
    "filename" : "file2",
    "cod" : NumberLong(95),
    "subdocs" : {
        "length" : NumberLong(11),
        "desc" : "000"
    }
}

However I want to get: file1 with the subdocuments with desc 000 and 011, and file2 with the subdocumnt 000

{
    "_id" : ObjectId("582eeb5f75f58055246bd22d"),
    "filename" : "file1",
    "cod" : NumberLong(90),
    "subdocs" : [
        {
            "length" : NumberLong(10),
            "desc" : "000"
        },
        {
            "length" : NumberLong(15),
            "desc" : "011"
        }
    ]
}
{
    "_id" : ObjectId("582eeb5f75f58055246bd22e"),
    "filename" : "file2",
    "cod" : NumberLong(95),
    "subdocs" : {
        "length" : NumberLong(11),
        "desc" : "000"
    }
}

What is the correct way to do that? Any idea?

回答1:

You just need to add $group & $push. First you $unwind the subdocs to apply the $match followed by $group on id and $push the grouped subdocs.

db.ftmp.aggregate({
    $unwind: "$subdocs"
}, {
    $match: {
        "subdocs.desc": {
            $in: ["000", "011"]
        }
    }
}, {
    $group: {
        _id: "$_id",
        subdocs: {
            $push: "$subdocs"
        },
        filename: {
            $first: "$filename"
        },
        cod: {
            $first: "$cod"
        }
    }
})


回答2:

First of all using the $unwind operator as mentioned in this answer will cause a drop of performance in your application because unwinding your array result in more documents to process down in the pipeline. There is a better way to achieve this since MongoDB 2.6.

That being said, this is a perfect job for the $filter operator new in MongoDB 3.2.

The most efficient way to do this is in MongoDB 3.4. MongoDB 3.4 introduced the $in array operator for the aggregation framework which can be used in the $filter conditional expression which, when evaluates to true include the sub-document in the resulting array.

let values = [ '000', '011' ];

db.collection.aggregate([ 
    { "$project": { 
        "filename": 1, 
        "cod": 1, 
        "subdocs": { 
            "$filter": { 
                "input": "$subdocs", 
                "as": "s", 
                "cond": { "$in": [ "$$s.desc", values ] }
            } 
        } 
    }} 
])

In MongoDB 3.2 we need a slightly different approach because we can use the $in operator there. But luckily we have the $setIsSubset operator and as you might have guess performs a set operation on two array and return true if the first array is a subset of the second array. Because $setIsSubset first expression must be an array, need to make the desc field an array in our pipeline. To do this, we simply use the [] bracket the create that array field which is new MongoDB 3.2

db.collection.aggregate([ 
    { "$project": { 
        "filename": 1, 
        "cod": 1, 
        "subdocs": { 
            "$filter": { 
                "input": "$subdocs", 
                "as": "s", 
                "cond": { "$setIsSubset": [ [ "$$s.desc" ], values ] }
            } 
        } 
    }} 
])

MongoDB 3.0 is dead to me but if for some reasons you are running that version, you can use the $literal operator to return the one element array you need for the set operation and the $setDifference operator. This is left as exercise to the reader.