calculating 3 similar sized groups in Javascript o

We have:

var range = [9,18,3,14,2,6,12,7,11,2,1,4]
var total = 89;
var group_size = total / 3;

As result I need 3 groups similar sized but group1 and group2 can never be bigger than group_size.

The result for this example would be

var group1 = 27; // 9,18
var group2 = 25; // 3,14,2,6
var group3 = 37; // 12,7,11,2,1,4

How can I achieve this in Javascript/jQuery?

回答1:

I think you need to pop values with Javascript if you want this. Something like:

var range = [9,18,3,14,2,6,12,7,11,2,1,4]
var total = 89;
var group_size = total / 3;
var values = [0];
var groupnr = 0;
// Reverse the array, because pop will get the last element
range = range.reverse();

// While elements
while( range.length ) {
    // get the last element and remove from array
    var curvalue = range.pop();

    // To large, create a new element
    if( values[groupnr] + curvalue > group_size && groupnr < 2 ) {
         groupnr++;
         values[groupnr] = 0;
    }
    // Increase
    values[groupnr] += curvalue;
}
console.log(values);

回答2:

Update: This really answers a similar question (since removed by the author) which didn't include the restriction that the first two groups could not exceed the mean. With that restriction, it's a much simpler problem, and one that probably wouldn't have caught my attention. I'm leaving my answer here as the question it answers seems to be algorithmically interesting.

I have an answer using the Ramda functional programming library. You can see it in action on JSFiddle. (See below for an updated version of this that doesn't depend upon Ramda.) Ramda offers a number of convenient functions that make the code simpler. None of them should be surprising if you're at all used to functional programming, although if you're used to tools like Underscore or LoDash the parameter orders might seem backwards. (Believe me, there is a good reason.)

var equalSplit = (function() {
    var square = function(x) {return x * x;};
    var variance = function(groups) {
        var sizes = map(sum, groups),
            mean = sum(sizes) / sizes.length;
        return sum(map(pipe(subtract(mean), square), sizes));
    };
    var firstGroupChoices = function(group, count) {
        if (group.length < 2 || count < 2) {return group;}
        var mean = sum(group) / count;
        var current = 0, next = group[0], idx = 0;
        do {
            current = next;
            next = next + group[++idx];
        } while (next < mean);
        if (next === mean) {
            return [group.slice(0, idx + 1)]
        } else {
            return [
                group.slice(0, idx),
                group.slice(0, idx + 1)
            ];
        }
    };
    var val = function(group, count, soFar) {
        if (count <= 0 || group.length == 0) {
            return {groups: soFar, variance: variance(soFar)};
        }
        if (count == 1) {
            return val([], 0, soFar.concat([group]));
        }
        var choices = firstGroupChoices(group, count);
        var values = map(function(choice){
            return val(group.slice(choice.length), count - 1, 
                       soFar.concat([choice]));
        }, choices);
        return minWith(function(a, b) {
            return a.variance - b.variance;
        }, values);
    };
    return function(group, count) {
        return val(group, count, []).groups;
    }
}());

Here is some sample output from the Fiddle:

==================================================
Input: [9,18,3,14,2,6,12,7,11,2,1,4]
Split into 3 groups
--------------------------------------------------
Groups: [[9,18,3],[14,2,6,12],[7,11,2,1,4]]
Totals: [30,34,25]
Variance: 40.66666666666667
==================================================

==================================================
Input: [9,18,3,2,6,12,11,2,4]
Split into 3 groups
--------------------------------------------------
Groups: [[9,18],[3,2,6,12],[11,2,4]]
Totals: [27,23,17]
Variance: 50.66666666666667
==================================================

==================================================
Input: [23,10,6,22,22,21,22,14,16,21,13,14,22,16,22,6,16,14,8,20,10,19,12,14,12]
Split into 5 groups
--------------------------------------------------
Groups: [[23,10,6,22,22],[21,22,14,16],[21,13,14,22],[16,22,6,16,14,8],
         [20,10,19,12,14,12]]
Totals: [83,73,70,82,87]
Variance: 206
==================================================

I am not at all convinced that this algorithm will give you an actual optimal solution. I think there is some chance that a search might fall into a local minimum and not notice the global minimum in a nearby valley of the search space. But I haven't tried very hard to either prove it or come up with a counterexample. There is also a reasonable chance that it actually is correct. I have no idea if there is some more efficient algorithm than this. The problem feels vaguely like partitioning problems and knapsack problems, but I know I've never run across it before. Many of those problems are NP Hard/NP Complete, so I wouldn't expect this to have a really efficient algorithm available.

This one works in a fairly simple recursive manner. The internal val function accepts an array of numbers, the number of groups to create, and and accumulator (soFar) containing all the groups that have been created so far. If count is zero, it returns a simple result based upon the accumulator. If count is one, it recurs with an empty group, a count of zero, a an accumulator that now includes the original group as its last element.

For any other value of count it calculates the mean size of the remaining group, and then chooses the last initial partial sum of the group smaller than the mean and the first one larger than it (with a special case if there is one exactly equal to it), recurs to finds the value if those partial sequences are used as groups in the anser, and then returns the one with the smaller value.

Values are determined by calculated the variance in the total values of each subgroup formed. The variance is the sum of the squares of the distance from the mean.

For instance, if you started with these values:

[8, 6, 7, 5, 3, 1, 9]

And wanted to break them into three groups, you would have a mean of

(8 + 6 + 7 + 5 + 3 + 1 + 9 = 39)  / 3  => 13

If you broke them up like this:

[[8, 6], [7, 5, 3], [1, 9]]

you would have totals

[14, 15, 10]

and a variance of

(14 - 13)^2 + (15 - 13)^2 + (10 - 13)^2 => 14

But if you broke them like this:

[[8, 6], [7, 5], [3, 1, 9]]

your totals would be

[14, 12, 13]

and your variance would be

[14 - 13)^2 + (12 - 13)^2 + (13 - 13)^2 => 2

And since the latter split has the lower variance it is considered better.

Example

equalSplit([9, 18, 3, 2, 6, 12, 11, 2, 4], 3 []) = minVal(
    equalSplit([18, 3, 2, 6, 12, 11, 2, 4], 2, [[9]]),
    equalSplit([3, 2, 6, 12, 11, 2, 4], 2, [[9, 18]])
);

equalSplit([18, 3, 2, 6, 12, 11, 2, 4], 2, [[9]]) =
    equalSplit([12, 11, 2, 4], 1, [[9], [18, 3, 2, 6]]);

equalSplit([3, 2, 6, 12, 11, 2, 4], 2, [9, 18]) = minVal(
    equalSplit([12, 11, 2, 4], 1, [[9, 18], [3, 2, 6]])
    equalSplit([11, 2, 4], 1,  [[9, 18], [3, 2, 6, 12]]
);

equalSplit([12, 11, 2, 4], 1, [[9], [18, 3, 2, 6]]) =
    equalSplit([], 0, [[9], [18, 3, 2, 6], [12, 11, 2, 4]])

equalSplit([12, 11, 2, 4], 1,  [[9, 18], [3, 2, 6]]) =
    equalSplit([], 0,  [[9, 18], [3, 2, 6], [12, 11, 2, 4]]) =

equalSplit([11, 2, 4], 1,  [[9, 18], [3, 2, 6, 12]]
    equalSplit([], 0,  [[9, 18], [3, 2, 6, 12], [11, 2, 4]]

equalSplit([], 0, [[9], [18, 3, 2, 6], [12, 11, 2, 4]]) =
    variance((9), (18 + 3 + 2 + 6), (12 + 11 + 2 + 4)) =
    variance(9, 29, 29) = 266.67

equalSplit([], 0,  [[9, 18], [3, 2, 6], [12, 11, 2, 4]]) =
    variance((9 + 18), (3 + 2 + 6), (12 + 11 + 2 + 4)) =
    variance(27, 11, 29)  = 194.67

equalSplit([], 0,  [[9, 18], [3, 2, 6, 12], [11, 2, 4]] =
    variance((9 + 18), (3 + 2 + 6 + 12), (11 + 2 + 4)) =
    variance(27, 23, 17)  = 50.67

There is almost certainly plenty that could be done to clean up this code. But perhaps it's at least a start on your problem. It's been a very interesting challenge.

Update

I did create a version which does not depend upon Ramda. The code is very similar. (I guess I really didn't need Ramda, at least not for the final version.):

var equalSplit = (function() {
    var sum = function(list) {return list.reduce(function(a, b) {
        return a + b;
    }, 0);};
    var square = function(x) {return x * x;};
    var variance = function(groups) {
        var sizes = groups.map(sum),
            mean = sum(sizes) / sizes.length;
        return sum(sizes.map(function(size) {
            return square(size - mean);
        }, sizes));
    };
    var firstGroupChoices = function(group, count) {
        if (group.length < 2 || count < 2) {return group;}
        var mean = sum(group) / count;
        var current = 0, next = group[0], idx = 0;
        do {
            current = next;
            next = next + group[++idx];
        } while (next < mean);
        if (next === mean) {
            return [group.slice(0, idx + 1)]
        } else {
            return [
                group.slice(0, idx),
                group.slice(0, idx + 1)
            ];
        }
    };
    var val = function(group, count, soFar) {
        if (count <= 0 || group.length == 0) {
            return {groups: soFar, variance: variance(soFar)};
        }
        if (count == 1) {
            return val([], 0, soFar.concat([group]));
        }
        var choices = firstGroupChoices(group, count);
        var values = choices.map(function(choice){
            return val(group.slice(choice.length), count - 1, 
                       soFar.concat([choice]));
        });
        return values.sort(function(a, b) {
            return a.variance - b.variance;
        })[0];
    };
    return function(group, count) {
        return val(group, count, []).groups;
    }
}());

Of course, as now noted at the top, this answers a somewhat different question than is now being asked, but I think it's a more interesting question! :-)