How to get random number list, with fixed sum and

2019-06-06 06:08发布

问题:

How to get random number list by give size, and expectd sum, fully support

i hava a code sum-int.ts sum-float.ts internal/sum-num.ts

whai i want to do

  1. rN = radom number ( float or int ) between min ~ max
  2. size = [r1, r2, r3, ...rN].length
  3. sum = r1 + r2 + r3 + ...rN
  4. all rN should >= min, and <= max
  5. support (unique / no-unique) values

but now there has problem

  • can't make it work when max <= 0 with float (so i disable input it)
  • can't make it work when max <= 1 with int (so i disable input it)
  • the code has logic bug, so i come ask here how to make it by js


update

thx @SeverinPappadeux for int version, and idea for float

  • coreFnRandSumInt int version
  • coreFnRandSumFloat float version

{ size: 2, sum: 5, min: 0, max: 5, n: 5, maxv: 5 }
true 0 5 [ 2, 3 ] 0 [ 2, 3 ]
{ bool: true,
  ret_a: [ 2, 3 ],
  a_sum: 5,
  ret_b: [ 2, 3 ],
  b_sum: 5 }
[ 2, 3 ] 5
----------
{ size: 6, sum: 13, min: -8, max: 15, n: 61, maxv: 23 }
false 0 61 [ 9, 8, 7, 3, 6, 28 ] -8 [ 9, 8, 7, 3, 6, 28 ]
false 1 61 [ 11, 9, 7, 4, 5, 25 ] -8 [ 11, 9, 7, 4, 5, 25 ]
true 2 13 [ 1, -1, 0, -2, 2, 13 ] -8 [ 9, 7, 8, 6, 10, 21 ]
{ bool: true,
  ret_a: [ 9, 7, 8, 6, 10, 21 ],
  a_sum: 61,
  ret_b: [ 1, -1, 0, -2, 2, 13 ],
  b_sum: 13 }
[ 1, -1, 0, -2, 2, 13 ] 13
----------
{ size: 6, sum: -13, min: -8, max: 15, n: 35, maxv: 23 }
true 0 -13 [ 0, -6, -1, -4, -7, 5 ] -8 [ 8, 2, 7, 4, 1, 13 ]
{ bool: true,
  ret_a: [ 8, 2, 7, 4, 1, 13 ],
  a_sum: 35,
  ret_b: [ 0, -6, -1, -4, -7, 5 ],
  b_sum: -13 }
[ 0, -6, -1, -4, -7, 5 ] -13
{ size: 6, sum: 0, min: -8, max: 15, n: 48, maxv: 23 }
true 0 0 [ -1, 0, -3, -2, -4, 10 ] -8 [ 7, 8, 5, 6, 4, 18 ]
{ bool: true,
  ret_a: [ 7, 8, 5, 6, 4, 18 ],
  a_sum: 48,
  ret_b: [ -1, 0, -3, -2, -4, 10 ],
  b_sum: 0 }
[ -1, 0, -3, -2, -4, 10 ] 0

/**
 * not support unique, but will try make unique if can
 * thx @SeverinPappadeux for int version
 *
 * @see https://stackoverflow.com/questions/53279807/how-to-get-random-number-list-with-fixed-sum-and-size
 */
export function coreFnRandSumInt(argv: ISumNumParameterWuthCache)
{
    let {
        random,
        size,
        sum,
        min,
        max,
    } = argv;

    let sum_1_to_size = sum_1_to_n(size);

    sum = isUnset(sum) ? sum_1_to_size : sum;

    expect(sum).integer();

    min = isUnset(min) ? (sum > 0 ? 0 : sum) : min;
    max = isUnset(max) ? Math.abs(sum) : max;

    expect(min).integer();
    expect(max).integer();

    let n_sum = Math.abs(sum - size * min);
    let maxv = max - min;

    /*
    console.log({
        sum_1_to_size,
        size,
        sum,
        min,
        max,
        n_sum,
        maxv,
    });
    */

    if (sum > 0)
    {
        expect(sum).gt(min)
    }

    /**
     * pre-check
     */
    //expect(maxv, `(max - min) should > sum_1_to_size`).gte(sum_1_to_size);

    /**
     * probabilities
     */
    let prob = get_prob(size, maxv);

    expect(prob).is.array.lengthOf(size);

    /**
     * make rmultinom use with random.next
     */
    let rmultinomFn = libRmath.Multinomial(fakeLibRmathRng(random.next)).rmultinom;

    /**
     * low value for speed up, but more chance fail
     */
    let n_len = argv.limit || 5 || n_sum;
    /**
     * rebase number
     */
    let n_diff: number = min;

    const rmultinomCreateFn = (n_len: number) => {
        return (rmultinomFn(n_len, n_sum, prob) as number[][])
            .reduce((a, value) =>
            {
                let i = value.length;
                let b_sum = 0;
                let bool = false;
                let unique_len = 0;

                while(i--)
                {
                    let v = value[i];
                    let n = v + n_diff;

                    if (value.indexOf(v) === i)
                    {
                        unique_len++;
                    }

                    if (n >= min && n <= max)
                    {
                        bool = true;
                        value[i] = n;

                        b_sum += n
                    }
                    else
                    {
                        bool = false;
                        break;
                    }
                }

                if (bool && b_sum === sum)
                {
                    let item = {
                        value,
                        unique_len,
                        b_sum,
                        bool,
                    };

                    a.push(item)
                }

                return a
            }, [] as {
                value: number[],
                unique_len: number,
                b_sum: number,
                bool: boolean,
            }[])
            .sort((a, b) => b.unique_len - a.unique_len)
            ;
    };

    /**
     * pre-make fail-back value
     */
    const cache_max = 10;
    let cache: number[][] = [];

    {
        let len = 200;

        let arr = array_unique(rmultinomCreateFn(len));

        if (arr.length)
        {
            let i = Math.min(cache_max, arr.length);

            while(i--)
            {
                cache.push(arr[i].value)
            }

            cache = array_unique(cache.map(v => v.sort()))
        }

        arr = undefined;

//      console.log(cache);
    }

    /**
     * try reset memory
     */
    argv = undefined;

    return () =>
    {
        let arr = rmultinomCreateFn(n_len);

        let ret_b: number[];
        let bool_toplevel: boolean;

        let c_len = cache.length;

        if (arr.length)
        {
            ret_b = arr[0].value;
            bool_toplevel = arr[0].bool;

            if (bool_toplevel && c_len < cache_max)
            {
                cache.push(ret_b);
            }
        }
        else if (c_len)
        {
            let i = UtilDistributions.randIndex(random, c_len);

            ret_b = cache[i];
            bool_toplevel = true;
        }

        if (!bool_toplevel || !ret_b)
        {
            throw new Error(`can't generator value by current input argv, or try set limit for high number`)
        }

        return ret_b;
    }
}

回答1:

Ok, here it is. Let's start with integer problem. Simplest way to proceed is to use statistical distribution which is naturally produced set of numbers summed to a fixed value. And there is such distribution - Multinomial distribution. It has fixed sum equal to n, it provides sampled values from 0 to n. Because requirement is for sampling interval to be arbitrary, first we will shift interval to have minimum at 0, sample and then shift it back. Note, that sampled values might be higher than desired max, thus we have to use rejection technique, where any sample above max will be reject and next attempt will be sampled.

We use multinomial sampling from Python/Numpy. Along with rejection you could add test for uniqueness as well. Code, python 3.7

import numpy as np

def sample_sum_interval(n: int, p, maxv: int):
    while True:
        q = np.random.multinomial(n, p, size=1)[0]
        v = np.where(q > maxv)
        if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject
            # test on unique if len(np.unique(q)) == len(q)
            return q
    return None

k = 6
min = -8
max = 13
sum = 13

n    = sum - k*min # redefined sum
maxv = max - min   # redefined max, min would be 0
p = np.full((k), np.float64(1.0)/np.float64(k), dtype=np.float64) # probabilities

q = sample_sum_interval(n, p, maxv) + min # back to original interval
print(q)
print(np.sum(q))
print(np.mean(q))

q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))

q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))

Output

[ 5  0 -2  3  3  4]
13
2.1666666666666665
[ 3  3 -1  2  1  5]   
13                    
2.1666666666666665    
[-4  0  0  3 10  4]   
13                    
2.1666666666666665    

In order to translate it to Javascript, you would need either multinomial sampling, or binomial one, from binomial it is easy to get to multinomial.

UPDATE

ok, here is output when I don't add min to the result, sum would be 61 (13+6*8), range [0...21]

[11  7  6  8  9 20]
61
10.166666666666666
[ 5 10 14 13 14  5]
61
10.166666666666666
[ 9 12  7 15  7 11]
61
10.166666666666666

Apparently, there is a Javascript library with multinomial sampling, which is modeled after R, thank you @bluelovers

It should be called in the loop like this:

v = rmultinom(1, n, p);

and then v should be checked to be within range [0...maxv] and accepted or rejected if outside.

UPDATE II

Let me quickly (sorry, really have no time, will get to it tomorrow) describethe idea how I would do it for floats. There is similar distribution with generated bunch of numbers in the [0...1] range called Dirichlet distribution, and it always sums to fixed value of 1. In Python/Numpy it is https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.dirichlet.html.

Suppose I sampled n numbers di from Dirichlet and then map them to [min...max] interval:

xi = min + di*(max-min)

Then I sum them all, using property that all di sums to 1:

Sum = n*min + (max - min) = (n-1)*min + max

If Sum is fixed, then it mean we have to redefine maximum value - lets call it sampling maxs.

So sampling procedure would be as following - sample n [0...1] numbers from Dirichlet, map them to [min...maxs] interval, and check if any of those numbers are below desired max (original, not redefined). If it is, you accept, otherwise reject, like in integer case.

Code below

import numpy as np

def my_dirichlet(n: int):
    """
    This is equivalent to numpy.random.dirichlet when all alphas are equal to 1
    """
    q = np.random.exponential(scale = np.float64(1.0), size=n)
    norm = 1.0/np.sum(q)
    return norm * q

def sample_sum_interval(n: int, summa: np.float64, minv: np.float64, maxv: np.float64):
    maxs  = summa - np.float64(n-1)*minv # redefine maximum value of the interval is sum is fixed
    alpha = np.full(n, np.float64(1.0), dtype=np.float64)

    while True:
        q = my_dirichlet(n) # q = np.random.dirichlet(alpha)
        q = minv + q*(maxs - minv) # we map it to [minv...maxs]
        v = np.where(q > maxv)     # but we need it in the [minv...maxv], so accept or reject test
        if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject, next sample
            return q
    return None

n = 5
min = np.float64(-2.0)
max = np.float64(3.0)
sum = np.float64(1.0)

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))

I've put standard NumPy Dirichlet sampling as well as custom Dirichlet sampling. Apparently, libRmath.js has exponential distribution sampling, but no Dirichlet, but it could be replaced with user-define code and exponentials. Remember, NumPy operates on vectors with single operator, loops are implicit.

Output:

[-0.57390094 -1.80924001  0.47630282  0.80008638  2.10675174]
1.0000000000000013
[-1.12192892  1.18503129  0.97525135  0.69175429 -0.73010801]
0.9999999999999987
[-0.34803521  0.36499743 -1.165332    0.9433809   1.20498888]
0.9999999999999991