In an array of objects, how can I aggregate based

2019-05-28 14:10发布

问题:

Say I have the following array of objects:

dataArray = [ 
  { id: "a", score: 1 }, 
  { id: "b", score: 2 }, 
  { id: "c", score: 5 }, 
  ...
  { id: "a", score: 3 },
  ...
  { id: "c", score: 2},
  ...
 ]

How can I obtain a resultArray like the following:

resultArray = [
  { id: "a", score: sum of all the scores when id is a },
  { id: "b", score: sum of all the scores when id is b },
  ...
  ...
]

回答1:

If you use the underscore library:

_.map  _.groupBy(dataArray, 'id'), (v, k) -> 
    {id: k, score: _.reduce(v, ((m, i) -> m + i['score']), 0) }


回答2:

The Underscore version is probably the most succinct. This is a plain CoffeeScript version that only creates one auxiliary object to have fast access by id and make the whole thing O(n):

aggregateScores = (dataArr) ->
  scores = {}
  for {id, score} in dataArr
    scores[id] = (scores[id] or 0) + score
  {id, score} for id, score of scores

console.log aggregateScores [ 
  { id: "a", score: 1 }
  { id: "b", score: 2 } 
  { id: "c", score: 5 } 
  { id: "a", score: 3 }
  { id: "c", score: 2 }
 ]
 # Output:
 # [{id:"a", score:4}, {id:"b", score:2}, {id:"c", score:7}]


回答3:

This is just plain JavaScript, but here is the long answer to your question:

function aggregate(values, init, keyGetter, valueGetter, aggregator) {
        var results = {}
        for (var index = 0; index != values.length; ++index) {
            var value = values[index]
            var key = keyGetter(value)
            var soFar;
            if (key in results) {
                soFar = results[key]
            } else {
                soFar = init
            }
            value = valueGetter(value)
            results[key] = aggregator(soFar, value)
        }
        return results
    }

    var array = [
        { id: 'a', score: 1 },
        { id: 'b', score: 2 },
        { id: 'c', score: 5 },
        { id: 'a', score: 3 },
        { id: 'c', score: 2 }
    ]

    function keyGetter(value) {
        return value.id
    }

    function valueGetter(value) {
        return value.score
    }

    function aggregator(sum, value) {
        return sum + value
    }

    function ready() {
        var results = aggregate(array, 0, keyGetter, valueGetter, aggregator)
        console.info(results)
    }


回答4:

Here's a straightforward coffeescript version:

data = [
  { id: "a", score: 1 }
  { id: "b", score: 2 }
  { id: "a", score: 5 }
  { id: "c", score: 2 }
  { id: "b", score: 3 }
]

# Aggregate scores in a map.
resultSet = {}
for obj in data
  resultSet[obj.id] ?= 0
  resultSet[obj.id] += obj.score
console.log resultSet

# Create array from map.
resultArr = for key, val of resultSet
  { id: key, score: val}
console.log resultArr

The output is:

{ a: 6, b: 5, c: 2 }
[ { id: 'a', score: 6 },
  { id: 'b', score: 5 },
  { id: 'c', score: 2 } ]

I'm sure it's possible to create a fancier solution using the functions in underscore, but the coffeescript solution isn't bad so I went for something simple to understand.



回答5:

It's a bit overkill if this is the only aggregation you want to do but there is a nicely documented aggregation library called Lumenize, that does simple group-by operations like this in addition to more advanced pivot table, n-dimensional cubes, hierarchical roll-ups, and timezone-precise time-series aggregations.

Here is the jsFiddle for a Lumenize solution.

If you want to try it in node.js:

npm install Lumenize --save

then put this into a file named lumenizeGroupBy.coffee:

lumenize = require('Lumenize')
dataArray = [
  { id: "a", score: 1 },
  { id: "b", score: 2 },
  { id: "c", score: 5 },
  { id: "a", score: 3 },
  { id: "c", score: 2}
]

dimensions = [{field:'id'}]
metrics = [{field: 'score', f: 'sum', as: 'sum'}]
config = {dimensions, metrics}

cube = new lumenize.OLAPCube(config, dataArray)
console.log(cube.toString(null, null, 'sum'))

and run

coffee lumenizeGroupBy.coffee