Sampling a random subset from an array

2019-01-04 02:28发布

What is a clean way of taking a random sample, without replacement from an array in javascript? So suppose there is an array

x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]

and I want to randomly sample 5 unique values; i.e. generate a random subset of length 5. To generate one random sample one could do something like:

x[Math.floor(Math.random()*x.length)];

But if this is done multiple times, there is a risk of a grabbing the same entry multiple times.

9条回答
ゆ 、 Hurt°
2楼-- · 2019-01-04 02:49

Or... if you use underscore.js...

_und = require('underscore');

...

function sample(a, n) {
    return _und.take(_und.shuffle(a), n);
}

Simple enough.

查看更多
一纸荒年 Trace。
3楼-- · 2019-01-04 02:49

Here is another implementation based on Fisher-Yater Shuffle. But this one is optimized for the case where the sample size is significantly smaller than the array length. This implementation doesn't scan the entire array nor allocates arrays as large as the original array. It uses sparse arrays to reduce memory allocation.

function getRandomSample(array, count) {
    var indices = [];
    var result = new Array(count);
    for (let i = 0; i < count; i++ ) {
        let j = Math.floor(Math.random() * (array.length - i) + i);
        result[i] = array[indices[j] === undefined ? j : indices[j]];
        indices[j] = indices[i] === undefined ? i : indices[i];
    }
    return result;
}
查看更多
forever°为你锁心
4楼-- · 2019-01-04 02:51

A little late to the party but this could be solved with underscore's new sample method (underscore 1.5.2 - Sept 2013):

var x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];

var randomFiveNumbers = _.sample(x, 5);
查看更多
疯言疯语
5楼-- · 2019-01-04 03:00

In my opinion, I do not think shuffling the entire deck necessary. You just need to make sure your sample is random not your deck. What you can do, is select the size amount from the front then swap each one in the sampling array with another position in it. So, if you allow replacement you get more and more shuffled.

function getRandom(length) { return Math.floor(Math.random()*(length)); }

function getRandomSample(array, size) {
    var length = array.length;

    for(var i = size; i--;) {
        var index = getRandom(length);
        var temp = array[index];
        array[index] = array[i];
        array[i] = temp;
    }

    return array.slice(0, size);
}

This algorithm is only 2*size steps, if you include the slice method, to select the random sample.


More Random

To make the sample more random, we can randomly select the starting point of the sample. But it is a little more expensive to get the sample.

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length);

    for(var i = size; i--;) {
        var index = (start + i)%length, rindex = getRandom(length);
        var temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
    }
    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));
    return sample;
}

What makes this more random is the fact that when you always just shuffling the front items you tend to not get them very often in the sample if the sampling array is large and the sample is small. This would not be a problem if the array was not supposed to always be the same. So, what this method does is change up this position where the shuffled region starts.


No Replacement

To not have to copy the sampling array and not worry about replacement, you can do the following but it does give you 3*size vs the 2*size.

function getRandomSample(array, size) {
    var length = array.length, swaps = [], i = size, temp;

    while(i--) {
        var rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[i];
        array[i] = temp;
        swaps.push({ from: i, to: rindex });
    }

    var sample = array.slice(0, size);

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

No Replacement and More Random

To apply the algorithm that gave a little bit more random samples to the no replacement function:

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length),
        swaps = [], i = size, temp;

    while(i--) {
        var index = (start + i)%length, rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
        swaps.push({ from: index, to: rindex });
    }

    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

Faster...

Like all of these post, this uses the Fisher-Yates Shuffle. But, I removed the over head of copying the array.

function getRandomSample(array, size) {
    var r, i = array.length, end = i - size, temp, swaps = getRandomSample.swaps;

    while (i-- > end) {
        r = getRandom(i + 1);
        temp = array[r];
        array[r] = array[i];
        array[i] = temp;
        swaps.push(i);
        swaps.push(r);
    }

    var sample = array.slice(end);

    while(size--) {
        i = swaps.pop();
        r = swaps.pop();
        temp = array[i];
        array[i] = array[r];
        array[r] = temp;
    }

    return sample;
}
getRandomSample.swaps = [];
查看更多
一纸荒年 Trace。
6楼-- · 2019-01-04 03:00

If you're using lodash the API changed in 4.x:

const oneItem = _.sample(arr);
const nItems = _.sampleSize(arr, n);

https://lodash.com/docs#sampleSize

查看更多
SAY GOODBYE
7楼-- · 2019-01-04 03:05

You can get a 5 elements sample by this way:

var sample = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
.map(a => [a,Math.random()])
.sort((a,b) => {return a[1] < b[1] ? -1 : 1;})
.slice(0,5)
.map(a => a[0]);

You can define it as a function to use in your code:

var randomSample = function(arr,num){ return arr.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).slice(0,num).map(a => a[0]); }

Or add it to the Array object itself:

Array.prototype.sample = function(num){ return this.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).slice(0,num).map(a => a[0]); };

if you want, you can separate the code for to have 2 functionalities (Shuffle and Sample):

Array.prototype.shuffle = function(){ return this.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).map(a => a[0]); };
Array.prototype.sample = function(num){ return this.shuffle().slice(0,num); };
查看更多
登录 后发表回答