Fast stable sorting algorithm implementation in ja

2019-01-01 05:38发布

问题:

I\'m looking to sort an array of about 200-300 objects, sorting on a specific key and a given order (asc/desc). The order of results must be consistent and stable.

What would be the best algorithm to use, and could you provide an example of it\'s implementation in javascript?

Thanks!

回答1:

It is possible to get a stable sorting from a non-stable sort function.

Before sorting you get the position of all the elements. In your sort condition, if both elements are equal, then you sort by the position.

Tada! You\'ve got a stable sort.

I\'ve written an article about it on my blog if you want to know more about this technique and how to implement it: http://blog.vjeux.com/2010/javascript/javascript-sorting-table.html



回答2:

Since you are looking for something stable, the merge sort should do.

http://www.stoimen.com/blog/2010/07/02/friday-algorithms-javascript-merge-sort/

The code can be found at the above website:

function mergeSort(arr)
{
    if (arr.length < 2)
        return arr;

    var middle = parseInt(arr.length / 2);
    var left   = arr.slice(0, middle);
    var right  = arr.slice(middle, arr.length);

    return merge(mergeSort(left), mergeSort(right));
}

function merge(left, right)
{
    var result = [];

    while (left.length && right.length) {
        if (left[0] <= right[0]) {
            result.push(left.shift());
        } else {
            result.push(right.shift());
        }
    }

    while (left.length)
        result.push(left.shift());

    while (right.length)
        result.push(right.shift());

    return result;
}

EDIT:

According to this post, it looks like Array.Sort in some implementations uses a merge sort.



回答3:

I know that this question has been answered for some time, but I happen to have a good stable merge sort implementation for Array and jQuery in my clipboard, so I\'ll share it in the hopes that some future searchers might find it useful.

It allows you to specify your own comparison function just like the normal Array.sort implementation.

Implementation

// Add stable merge sort to Array and jQuery prototypes
// Note: We wrap it in a closure so it doesn\'t pollute the global
//       namespace, but we don\'t put it in $(document).ready, since it\'s
//       not dependent on the DOM
(function() {

  // expose to Array and jQuery
  Array.prototype.mergeSort = jQuery.fn.mergeSort = mergeSort;

  function mergeSort(compare) {

    var length = this.length,
        middle = Math.floor(length / 2);

    if (!compare) {
      compare = function(left, right) {
        if (left < right)
          return -1;
        if (left == right)
          return 0;
        else
          return 1;
      };
    }

    if (length < 2)
      return this;

    return merge(
      this.slice(0, middle).mergeSort(compare),
      this.slice(middle, length).mergeSort(compare),
      compare
    );
  }

  function merge(left, right, compare) {

    var result = [];

    while (left.length > 0 || right.length > 0) {
      if (left.length > 0 && right.length > 0) {
        if (compare(left[0], right[0]) <= 0) {
          result.push(left[0]);
          left = left.slice(1);
        }
        else {
          result.push(right[0]);
          right = right.slice(1);
        }
      }
      else if (left.length > 0) {
        result.push(left[0]);
        left = left.slice(1);
      }
      else if (right.length > 0) {
        result.push(right[0]);
        right = right.slice(1);
      }
    }
    return result;
  }
})();

Example Usage

var sorted = [
  \'Finger\',
  \'Sandwich\',
  \'sandwich\',
  \'5 pork rinds\',
  \'a guy named Steve\',
  \'some noodles\',
  \'mops and brooms\',
  \'Potato Chip Brand® chips\'
].mergeSort(function(left, right) {
  lval = left.toLowerCase();
  rval = right.toLowerCase();

  console.log(lval, rval);
  if (lval < rval)
    return -1;
  else if (lval == rval)
    return 0;
  else
    return 1;
});

sorted == [\"5 pork rinds\", \"a guy named Steve\", \"Finger\", \"mops and brooms\", \"Potato Chip Brand® chips\", \"Sandwich\", \"sandwich\", \"some noodles\"];


回答4:

You can use the following polyfill to implement a stable sort regardless of the native implementation, based on the assertion made in this answer:

// ECMAScript 5 polyfill
Object.defineProperty(Array.prototype, \'stableSort\', {
  configurable: true,
  writable: true,
  value: function stableSort (compareFunction) {
    \'use strict\'

    var length = this.length
    var entries = Array(length)
    var index

    // wrap values with initial indices
    for (index = 0; index < length; index++) {
      entries[index] = [index, this[index]]
    }

    // sort with fallback based on initial indices
    entries.sort(function (a, b) {
      var comparison = Number(this(a[1], b[1]))
      return comparison || a[0] - b[0]
    }.bind(compareFunction))

    // re-map original array to stable sorted values
    for (index = 0; index < length; index++) {
      this[index] = entries[index][1]
    }
    
    return this
  }
})

// usage
const array = Array(500000).fill().map(() => Number(Math.random().toFixed(4)))

const alwaysEqual = () => 0
const isUnmoved = (value, index) => value === array[index]

// not guaranteed to be stable
console.log(\'sort() stable?\', array
  .slice()
  .sort(alwaysEqual)
  .every(isUnmoved)
)
// guaranteed to be stable
console.log(\'stableSort() stable?\', array
  .slice()
  .stableSort(alwaysEqual)
  .every(isUnmoved)
)

// performance using realistic scenario with unsorted big data
function time(arrayCopy, algorithm, compare) {
  var start
  var stop
  
  start = performance.now()
  algorithm.call(arrayCopy, compare)
  stop = performance.now()
  
  return stop - start
}

const ascending = (a, b) => a - b

const msSort = time(array.slice(), Array.prototype.sort, ascending)
const msStableSort = time(array.slice(), Array.prototype.stableSort, ascending)

console.log(\'sort()\', msSort.toFixed(3), \'ms\')
console.log(\'stableSort()\', msStableSort.toFixed(3), \'ms\')
console.log(\'sort() / stableSort()\', (100 * msSort / msStableSort).toFixed(3) + \'%\')

Running the performance tests implemented above, stableSort() appears to run at about 57% of the speed of sort() on Chrome version 59-61.

Using .bind(compareFunction) on the wrapped anonymous function within stableSort() boosted that relative performance from about 38% by avoiding an unneeded scoped reference to compareFunction on each call by assigning it to the context instead.

Update

Changed ternary operator to logical short-circuiting, which tends to perform better on average (appears to make a 2-3% difference in efficiency).



回答5:

Somewhat shorter version of the same thing using ES2017 features like arrow functions and destructuring:

Function

var stableSort = (arr, compare) => arr
  .map((item, index) => ({item, index}))
  .sort((a, b) => compare(a.item, b.item) || a.index - b.index)
  .map(({item}) => item)

It accepts input array and compare function:

stableSort([5,6,3,2,1], (a, b) => a - b)

It also returns new array instead of making in-place sort like the built-in Array.sort() function.

Test

If we take the following input array, initially sorted by weight:

// sorted by weight
var input = [
  { height: 100, weight: 80 },
  { height: 90, weight: 90 },
  { height: 70, weight: 95 },
  { height: 100, weight: 100 },
  { height: 80, weight: 110 },
  { height: 110, weight: 115 },
  { height: 100, weight: 120 },
  { height: 70, weight: 125 },
  { height: 70, weight: 130 },
  { height: 100, weight: 135 },
  { height: 75, weight: 140 },
  { height: 70, weight: 140 }
]

Then sort it by height using stableSort:

stableSort(input, (a, b) => a.height - b.height)

Results in:

// Items with the same height are still sorted by weight 
// which means they preserved their relative order.
var stable = [
  { height: 70, weight: 95 },
  { height: 70, weight: 125 },
  { height: 70, weight: 130 },
  { height: 70, weight: 140 },
  { height: 75, weight: 140 },
  { height: 80, weight: 110 },
  { height: 90, weight: 90 },
  { height: 100, weight: 80 },
  { height: 100, weight: 100 },
  { height: 100, weight: 120 },
  { height: 100, weight: 135 },
  { height: 110, weight: 115 }
]

However sorting the same input array using the built-in Array.sort() (in Chrome/NodeJS):

input.sort((a, b) => a.height - b.height)

Returns:

var unstable = [
  { height: 70, weight: 140 },
  { height: 70, weight: 95 },
  { height: 70, weight: 125 },
  { height: 70, weight: 130 },
  { height: 75, weight: 140 },
  { height: 80, weight: 110 },
  { height: 90, weight: 90 },
  { height: 100, weight: 100 },
  { height: 100, weight: 80 },
  { height: 100, weight: 135 },
  { height: 100, weight: 120 },
  { height: 110, weight: 115 }
]

Resources

  • Wikipedia
  • MDN
  • JSFiddle

Update

Array.prototype.sort is now stable in V8 v7.0 / Chrome 70!

Previously, V8 used an unstable QuickSort for arrays with more than 10 elements. Now, we use the stable TimSort algorithm.

source



回答6:

The following sorts the supplied array, by applying the supplied compare function, returning the original index comparison when the compare function returns 0:

function stableSort(arr, compare) {
    var original = arr.slice(0);

    arr.sort(function(a, b){
        var result = compare(a, b);
        return result === 0 ? original.indexOf(a) - original.indexOf(b) : result;
    });

    return arr;
}

The example below sorts an array of names by surname, retaining the order of equal surnames:

var names = [
	{ surname: \"Williams\", firstname: \"Mary\" },
	{ surname: \"Doe\", firstname: \"Mary\" }, 
	{ surname: \"Johnson\", firstname: \"Alan\" }, 
	{ surname: \"Doe\", firstname: \"John\" }, 
	{ surname: \"White\", firstname: \"John\" }, 
	{ surname: \"Doe\", firstname: \"Sam\" }
]

function stableSort(arr, compare) {
    var original = arr.slice(0);

    arr.sort(function(a, b){
        var result = compare(a, b);
        return result === 0 ? original.indexOf(a) - original.indexOf(b) : result;
    });
	
    return arr;
}

stableSort(names, function(a, b) { 
	return a.surname > b.surname ? 1 : a.surname < b.surname ? -1 : 0;
})

names.forEach(function(name) {
	console.log(name.surname + \', \' + name.firstname);
});



回答7:

Here\'s a stable implementation. It works by using the native sort, but in cases where elements compare as equal, you break ties using the original index position.

function stableSort(arr, cmpFunc) {
    //wrap the arr elements in wrapper objects, so we can associate them with their origional starting index position
    var arrOfWrapper = arr.map(function(elem, idx){
        return {elem: elem, idx: idx};
    });

    //sort the wrappers, breaking sorting ties by using their elements orig index position
    arrOfWrapper.sort(function(wrapperA, wrapperB){
        var cmpDiff = cmpFunc(wrapperA.elem, wrapperB.elem);
        return cmpDiff === 0 
             ? wrapperA.idx - wrapperB.idx
             : cmpDiff;
    });

    //unwrap and return the elements
    return arrOfWrapper.map(function(wrapper){
        return wrapper.elem;
    });
}

a non-thorough test

var res = stableSort([{a:1, b:4}, {a:1, b:5}], function(a, b){
    return a.a - b.a;
});
console.log(res);

another answer alluded to this, but didn\'t post teh codez.

but, its not fast according to my benchmark. I modified a merge sort impl to accept a custom comparator function, and it was much faster.



回答8:

You can also use Timsort. This is a really complicated algorithm (400+ lines, hence no source code here), so see Wikipedia\'s description or use one of the existing JavaScript implementations:

GPL 3 implementation. Packaged as Array.prototype.timsort. Appears to be an exact rewrite of Java code.

Public domain implementation Meant as a tutorial, the sample code only shows its use with integers.

Timsort is a highly optimized hybrid of mergesort and shuffle sort and is the default sorting algorithm in Python and in Java (1.7+). It is a complicated algorithm, since it uses different algorithms for many special cases. But as a result it\'s extremely fast under a wide variety of circumstances.



回答9:

A simple one mergeSort from http://www.stoimen.com/blog/2010/07/02/friday-algorithms-javascript-merge-sort/

var a = [34, 203, 3, 746, 200, 984, 198, 764, 9];

function mergeSort(arr)
{
    if (arr.length < 2)
         return arr;

    var middle = parseInt(arr.length / 2);
    var left   = arr.slice(0, middle);
    var right  = arr.slice(middle, arr.length);

    return merge(mergeSort(left), mergeSort(right));
}

function merge(left, right)
{
     var result = [];

    while (left.length && right.length) {
         if (left[0] <= right[0]) {
             result.push(left.shift());
         } else {
            result.push(right.shift());
         }
    }

    while (left.length)
        result.push(left.shift());

    while (right.length)
        result.push(right.shift());

    return result;
}

console.log(mergeSort(a));


回答10:

Counting Sort is faster than merge sort (it performs in O(n) time) and it is intended for use on integers.

Math.counting_sort = function (m) {
    var i
    var j
    var k
    var step
    var start
    var Output
    var hash
    k = m.length
    Output = new Array ()
    hash = new Array ()
    // start at lowest possible value of m
    start = 0
    step = 1
    // hash all values
    i = 0
    while ( i < k ) {
        var _m = m[i]
        hash [_m] = _m
        i = i + 1
    }
    i = 0
    j = start
    // find all elements within x
    while ( i < k ) {
        while ( j != hash[j] ) {
            j = j + step
        }
        Output [i] = j
        i = i + 1
        j = j + step
    }
    return Output
}

Example:

var uArray = new Array ()<br/>
var sArray = new Array ()<br/><br/>
uArray = [ 10,1,9,2,8,3,7,4,6,5 ]<br/>
sArray = Math.counting_sort ( uArray ) // returns a sorted array


回答11:

I have to sort multidimensional arrays by an arbitrary column, and then by another. I use this function to sort:

function sortMDArrayByColumn(ary, sortColumn){

    //Adds a sequential number to each row of the array
    //This is the part that adds stability to the sort
    for(var x=0; x<ary.length; x++){ary[x].index = x;}

    ary.sort(function(a,b){
        if(a[sortColumn]>b[sortColumn]){return 1;}
        if(a[sortColumn]<b[sortColumn]){return -1;}
        if(a.index>b.index){
            return 1;
        }
        return -1;
    });
}

Notice that ary.sort never returns zero, which is where some implementations of the \"sort\" function make decisions that might not be right.

This is pretty darn fast, too.



回答12:

Here\'s how you could extend JS default Array object with a prototype method utilizing MERGE SORT. This method allows for sorting on a specific key (first parameter) and a given order (\'asc\'/\'desc\' as second parameter)

Array.prototype.mergeSort = function(sortKey, direction){
  var unsortedArray = this;
  if(unsortedArray.length < 2) return unsortedArray;

  var middle = Math.floor(unsortedArray.length/2);
  var leftSubArray = unsortedArray.slice(0,middle).mergeSort(sortKey, direction);
  var rightSubArray = unsortedArray.slice(middle).mergeSort(sortKey, direction);

  var sortedArray = merge(leftSubArray, rightSubArray);
  return sortedArray;

  function merge(left, right) {
    var combined = [];
    while(left.length>0 && right.length>0){
      var leftValue = (sortKey ? left[0][sortKey] : left[0]);
      var rightValue = (sortKey ? right[0][sortKey] : right[0]);
      combined.push((direction === \'desc\' ? leftValue > rightValue : leftValue < rightValue) ? left.shift() : right.shift())
    }
    return combined.concat(left.length ? left : right)
  }
}

You can test this out yourself by dropping the above snippet into your browser console, then trying:

var x = [2,76,23,545,67,-9,12];
x.mergeSort(); //[-9, 2, 12, 23, 67, 76, 545]
x.mergeSort(undefined, \'desc\'); //[545, 76, 67, 23, 12, 2, -9]

Or order based on a specific field in an array of objects:

var y = [
  {startTime: 100, value: \'cat\'},
  {startTime: 5, value: \'dog\'},
  {startTime: 23, value: \'fish\'},
  {startTime: 288, value: \'pikachu\'}
]
y.mergeSort(\'startTime\');
y.mergeSort(\'startTime\', \'desc\');


回答13:

So I needed a stable sort for my React+Redux app, and Vjeux\'s answer here helped me. However, my (generic) solution seems different than the others I see here so far, so I\'m sharing it in case anyone else has a matching use-case:

  • I really just want to have something similar to the sort() API, where I can pass a comparator function.
  • Sometimes I can sort in-place, and sometimes my data is immutable (because Redux) and I need a sorted copy instead. So I need a stable sorting function for each use-case.
  • ES2015.

My solution is to create a typed array of indices, then use a comparison function to sort these indices based on the to-be-sorted array. Then we can use the sorted indices to either sort the original array or create a sorted copy in a single pass. If that\'s confusing, think of it this way: where you would normally pass a comparison function like:

(a, b) => { 
  /* some way to compare a and b, returning -1, 0, or 1 */ 
};

You now instead use:

(i, j) => { 
  let a = arrayToBeSorted[i], b = arrayToBeSorted[j]; 
  /* some way to compare a and b, returning -1 or 1 */
  return i - j; // fallback when a == b
}

Speed is good; it is basically the built-in sorting algorithm is, plus two linear passes in the end, and one extra layer of pointer indirection overhead.

Happy to receive feedback on this approach. Here is my full implementation of it it:

/**
 * - `array`: array to be sorted
 * - `comparator`: closure that expects indices `i` and `j`, and then
 *   compares `array[i]` to `array[j]` in some way. To force stability,
 *   end with `i - j` as the last \"comparison\".
 * 
 * Example:
 * ```
 *  let array = [{n: 1, s: \"b\"}, {n: 1, s: \"a\"}, {n:0, s: \"a\"}];
 *  const comparator = (i, j) => {
 *    const ni = array[i].n, nj = array[j].n;
 *    return ni < nj ? -1 :
 *      ni > nj ? 1 :
 *        i - j;
 *  };
 *  stableSortInPlace(array, comparator);
 *  // ==> [{n:0, s: \"a\"}, {n:1, s: \"b\"}, {n:1, s: \"a\"}]
 * ```
 */
function stableSortInPlace(array, comparator) {
  return sortFromIndices(array, findIndices(array, comparator));
}

function stableSortedCopy(array, comparator){
  let indices = findIndices(array, comparator);
  let sortedArray = [];
  for (let i = 0; i < array.length; i++){
    sortedArray.push(array[indices[i]]);
  }
  return sortedArray;
}

function findIndices(array, comparator){
  // Assumes we don\'t have to worry about sorting more than 
  // 4 billion elements; if you know the upper bounds of your
  // input you could replace it with a smaller typed array
  let indices = new Uint32Array(array.length);
  for (let i = 0; i < indices.length; i++) {
    indices[i] = i;
  }
  // after sorting, `indices[i]` gives the index from where
  // `array[i]` should take the value from, so to sort
  // move the value at at `array[indices[i]]` to `array[i]`
  return indices.sort(comparator);
}

// If I\'m not mistaken this is O(2n) - each value is moved
// only once (not counting the vacancy temporaries), and 
// we also walk through the whole array once more to check
// for each cycle.
function sortFromIndices(array, indices) {
  // there might be multiple cycles, so we must
  // walk through the whole array to check.
  for (let k = 0; k < array.length; k++) {
    // advance until we find a value in
    // the \"wrong\" position
    if (k !== indices[k]) {
      // create vacancy to use \"half-swaps\" trick,
      // props to Andrei Alexandrescu
      let v0 = array[k];
      let i = k;
      let j = indices[k];
      while (j !== k) {
        // half-swap next value
        array[i] = array[j];
        // array[i] now contains the value it should have,
        // so we update indices[i] to reflect this
        indices[i] = i;
        // go to next index
        i = j;
        j = indices[j];
      }
      // put original array[k] back in
      // and update indices
      array[i] = v0;
      indices[i] = i;
    }
  }
  return array;
}