Compare arrays as (multi-) sets

2020-03-01 19:45发布

I'm looking for an efficient way to find out whether two arrays contain same amounts of equal elements (in the == sense), in any order:

foo = {/*some object*/}
bar = {/*some other object*/}

a = [1,2,foo,2,bar,2]
b = [bar,2,2,2,foo,1]

sameElements(a, b) --> true

PS. Note that pretty much every solution in the thread uses === and not == for comparison. This is fine for my needs though.

8条回答
狗以群分
2楼-- · 2020-03-01 20:09

Update 5 I posted a new answer with a different approach.

Update

I extended the code to have the possibility of either checking by reference or equality

just pass true as second parameter to do a reference check.

Also I added the example to Brunos JSPerf

  • It runs at about 11 ops/s doing a reference check

I will comment the code as soon(!) as I get some spare time to explain it a bit more, but at the moment don't have the time for that, sry. Done

Update 2.

Like Bruno pointed out in the comments sameElements([NaN],[NaN]) yields false

In my opinion this is the correct behaviour as NaN is ambigious and should always lead to a false result,at least when comparing NaN.equals(NaN). But he had quite a good point.

Whether

[1,2,foo,bar,NaN,3] should be equal to [1,3,foo,NaN,bar,2] or not.

Ok.. honestly I'm a bit torn whether it should or not, so i added two flags.

  • Number.prototype.equal.NaN
    • If true
      • NaN.equals(NaN) //true
  • Array.prototype.equal.NaN
    • If true
      • [NaN].equals([NaN],true) //true
      • note this is only for reference checks. As a deep check would invoke Number.prototype.equals anyway

Update 3:

Dang i totally missed 2 lines in the sort function.

Added

 r[0] = a._srt; //DANG i totally missed this line
 r[1] = b._srt; //And this.

Line 105 in the Fiddle

Which is kind of important as it determines the consistent order of the Elements.

Update 4
I tried to optimize the sort function a bit, and managed to get it up to about 20 ops/s.

Below is the updated code, as well as the updated fiddle =)

Also i chose to mark the objects outside the sort function, it doesn't seem to make a performance difference anymore, and its more readable


Here is an approach using Object.defineProperty to add equals functions to

Array,Object,Number,String,Boolean's prototype to avoid typechecking in one function for performance reasons. As we can recursively call .equals on any element.

But of course checking Objects for equality may cause performance issues in big Objects.

So if anyone feels unpleasant manipulating native prototypes, just do a type check and put it into one function

Object.defineProperty(Boolean.prototype, "equals", {
        enumerable: false,
        configurable: true,
        value: function (c) {
            return this == c; //For booleans simply return the equality
        }
    });

Object.defineProperty(Number.prototype, "equals", {
        enumerable: false,
        configurable: true,
        value: function (c) {
            if (Number.prototype.equals.NaN == true && isNaN(this) && c != c) return true; //let NaN equals NaN if flag set
            return this == c; // else do a normal compare
        }
    });

Number.prototype.equals.NaN = false; //Set to true to return true for NaN == NaN

Object.defineProperty(String.prototype, "equals", {
        enumerable: false,
        configurable: true,
        value: Boolean.prototype.equals //the same (now we covered the primitives)
    });

Object.defineProperty(Object.prototype, "equals", {
        enumerable: false,
        configurable: true,
        value: function (c, reference) {
            if (true === reference) //If its a check by reference
                return this === c; //return the result of comparing the reference
            if (typeof this != typeof c) { 
                return false; //if the types don't match (Object equals primitive) immediately return
            }
            var d = [Object.keys(this), Object.keys(c)],//create an array with the keys of the objects, which get compared
                f = d[0].length; //store length of keys of the first obj (we need it later)
            if (f !== d[1].length) {//If the Objects differ in the length of their keys
                return false; //immediately return
            }
            for (var e = 0; e < f; e++) { //iterate over the keys of the first object
                if (d[0][e] != d[1][e] || !this[d[0][e]].equals(c[d[1][e]])) {
                    return false; //if either the key name does not match or the value does not match, return false. a call of .equal on 2 primitives simply compares them as e.g Number.prototype.equal gets called
                }
            }
            return true; //everything is equal, return true
        }
    });
Object.defineProperty(Array.prototype, "equals", {
        enumerable: false,
        configurable: true,
        value: function (c,reference) {

            var d = this.length;
            if (d != c.length) {
                return false;
            }
            var f = Array.prototype.equals.sort(this.concat());
            c = Array.prototype.equals.sort(c.concat(),f)

            if (reference){
                for (var e = 0; e < d; e++) {
                    if (f[e] != c[e] && !(Array.prototype.equals.NaN && f[e] != f[e] && c[e] != c[e])) {
                        return false;
                    }
                }                
            } else {
                for (var e = 0; e < d; e++) {
                    if (!f[e].equals(c[e])) {
                        return false;
                    }
                }
            }
            return true;

        }
    });

Array.prototype.equals.NaN = false; //Set to true to allow [NaN].equals([NaN]) //true

Object.defineProperty(Array.prototype.equals,"sort",{
  enumerable:false,
  value:function sort (curr,prev) {
         var weight = {
            "[object Undefined]":6,         
            "[object Object]":5,
            "[object Null]":4,
            "[object String]":3,
            "[object Number]":2,
            "[object Boolean]":1
        }
        if (prev) { //mark the objects
            for (var i = prev.length,j,t;i>0;i--) {
                t = typeof (j = prev[i]);
                if (j != null && t === "object") {
                     j._pos = i;   
                } else if (t !== "object" && t != "undefined" ) break;
            }
        }

        curr.sort (sorter);

        if (prev) {
            for (var k = prev.length,l,t;k>0;k--) {
                t = typeof (l = prev[k]);
                if (t === "object" && l != null) {
                    delete l._pos;
                } else if (t !== "object" && t != "undefined" ) break;
            }
        }
        return curr;

        function sorter (a,b) {

             var tStr = Object.prototype.toString
             var types = [tStr.call(a),tStr.call(b)]
             var ret = [0,0];
             if (types[0] === types[1] && types[0] === "[object Object]") {
                 if (prev) return a._pos - b._pos
                 else {
                     return a === b ? 0 : 1;
                 }
             } else if (types [0] !== types [1]){
                     return weight[types[0]] - weight[types[1]]
             }



            return a>b?1:a<b?-1:0;
        }

    }

});

With this we can reduce the sameElements function to

function sameElements(c, d,referenceCheck) {
     return c.equals(d,referenceCheck);  //call .equals of Array.prototype.
}

Note. of course you could put all equal functions into the sameElements function, for the cost of the typechecking.

Now here are 3 examples: 1 with deep checking, 2 with reference checking.

var foo = {
    a: 1,
    obj: {
        number: 2,
        bool: true,
        string: "asd"
    },
    arr: [1, 2, 3]
};

var bar = {
    a: 1,
    obj: {
        number: 2,
        bool: true,
        string: "asd"
    },
    arr: [1, 2, 3]
};

var foobar = {
    a: 1,
    obj: {
        number: 2,
        bool: true,
        string: "asd"
    },
    arr: [1, 2, 3, 4]
};

var a = [1, 2, foo, 2, bar, 2];
var b = [foo, 2, 2, 2, bar, 1];
var c = [bar, 2, 2, 2, bar, 1];

So these are the Arrays we compare. And the output is

  1. Check a and b with references only.

    console.log (sameElements ( a,b,true)) //true As they contain the same elements

  2. Check b and c with references only

    console.log (sameElements (b,c,true)) //false as c contains bar twice.

  3. Check b and c deeply

    console.log (sameElements (b,c,false)) //true as bar and foo are equal but not the same

  4. Check for 2 Arrays containing NaN

    Array.prototype.equals.NaN = true;
    console.log(sameElements([NaN],[NaN],true)); //true.
    Array.prototype.equals.NaN = false;

Demo on JSFiddle

查看更多
相关推荐>>
3楼-- · 2020-03-01 20:11

Using efficient lookup tables for the counts of the elements:

function sameElements(a) { // can compare any number of arrays
    var map, maps = [], // counting booleans, numbers and strings
        nulls = [], // counting undefined and null
        nans = [], // counting nans
        objs, counts, objects = [],
        al = arguments.length;

    // quick escapes:
    if (al < 2)
        return true;
    var l0 = a.length;
    if ([].slice.call(arguments).some(function(s) { return s.length != l0; }))
        return false;

    for (var i=0; i<al; i++) {
        var multiset = arguments[i];
        maps.push(map = {}); // better: Object.create(null);
        objects.push({vals: objs=[], count: counts=[]});
        nulls[i] = 0;
        nans[i] = 0;
        for (var j=0; j<l0; j++) {
            var val = multiset[j];
            if (val !== val)
                nans[i]++;
            else if (val === null)
                nulls[i]++;
            else if (Object(val) === val) { // non-primitive
                var ind = objs.indexOf(val);
                if (ind > -1)
                    counts[ind]++;
                else
                    objs.push(val), counts.push(1);
            } else { // booleans, strings and numbers do compare together
                if (typeof val == "boolean")
                    val = +val;
                if (val in map)
                    map[val]++;
                else
                    map[val] = 1;
            }
        }
    }

    // testing if nulls and nans are the same everywhere
    for (var i=1; i<al; i++)
        if (nulls[i] != nulls[0] || nans[i] != nans[0])
            return false;

    // testing if primitives were the same everywhere
    var map0 = maps[0];
    for (var el in map0)
        for (var i=1; i<al; i++) {
            if (map0[el] !== maps[i][el])
                return false;
            delete maps[i][el];
        }
    for (var i=1; i<al; i++)
        for (var el in maps[i])
            return false;

    // testing if objects were the same everywhere
    var objs0 = objects[0].vals,
        ol = objs0.length;
        counts0 = objects[0].count;
    for (var i=1; i<al; i++)
        if (objects[i].count.length != ol)
            return false;
    for (var i=0; i<ol; i++)
        for (var j=1; j<al; j++)
            if (objects[j].count[ objects[j].vals.indexOf(objs0[i]) ] != counts0[i])
                return false; 

    // else, the multisets are equal:
    return true;
}

It still uses indexOf search amongst all objects, so if you have multisets with many different objects you might want to optimize that part as well. Have a look at Unique ID or object signature (and it's duplicate questions) for how to get lookup table keys for them. And if you don't have many primitive values in the multisets, you might just store them in arrays and sort those before comparing each item-by-item (like @Bruno did).

Disclaimer: This solution doesn't try to get the [[PrimitiveValue]] of objects, they will never be counted as equal to primitives (while == would do).

Here is the update on @Bruno's jsperf test of the answers, yet I guess only two objects (each of them present 500 times in the 10k array) and no duplicate primitive values are not representative.

查看更多
Lonely孤独者°
4楼-- · 2020-03-01 20:14

Edit 2
1) Thanks to user2357112 for pointing out the Object.prototype.toString.call issue this also showed, the reason it was that fast, that it didn't consider Arrays ...

I fixed the code,it should be working now :), unfortunately its now at about 59ops/s on chrome and 45ops/s on ff.

Fiddle and JSPerf is updated.

Edit
1) I fixed the code, it supports mutliple variables referencing the same Object now. A little bit slower than before, but still over 100ops/s on chrome.

2) I tried using a bitmask instead of an array to keep multiple positions of the objects, but its nearly 15ops/s slow

3) As pointed ot in the comments i forgot to reset tmp after [[get]] is called fixed the code, the fiddle, and the perf.


So thanks to user2357112 with his Answer heres another approach using counting

var sameElements = (function () {
    var f, of, objectFlagName;
    of = objectFlagName = "__pos";
    var tstr = function (o) {
        var t = typeof o;
        if (o === null)
            t = "null";
        return t
    };
    var types = {};
    (function () {
        var tmp = {};
        Object.defineProperty(types, tstr(1), {
            set: function (v) {
                if (f)
                    tmp[v] = -~tmp[v];
                else
                    tmp[v] = ~-tmp[v];
            },
            get: function () {
                var ret = 1;
                for (var k in tmp) {
                    ret &= !tmp[k];
                }
                tmp = {};
                return ret;
            }
        });
    })();
    (function () {
        var tmp = {};
        Object.defineProperty(types, tstr(""), {

            set: function (v) {
                if (f) {
                    tmp[v] = -~tmp[v];
                } else {

                    tmp[v] = ~-tmp[v];
                }
            },
            get: function () {
                var ret = 1;
                for (var k in tmp) {
                    ret &= !tmp[k];
                }
                tmp = {};                
                return ret;
            }
        });
    })();

    (function () {
        var tmp = [];
        function add (v) {
            tmp.push(v);
            if (v[of]===undefined) {
                v[of] = [tmp.length -1];
            } else {
                v[of].push(tmp.length -1)
            }            

        }
        Object.defineProperty(types, tstr({}), {
            get: function () {
                var ret = true;
                for (var i = tmp.length - 1; i >= 0; i--) {
                    var c = tmp[i]
                    if (typeof c !== "undefined") {
                        ret = false
                        delete c[of]
                    }
                }
                tmp = [];
                return ret;
            },
            set: function (v) {
                var pos;
                if (f) {
                    add (v);
                } else if (!f && (pos = v[of]) !== void 0) {
                       tmp[pos.pop()] = undefined;
                       if (pos.length === 0)
                            delete v[of];
                } else {
                        add (v);
                }
            }
        });
    }());
    (function () {
        var tmp = 0;
        Object.defineProperty(types, tstr(undefined), {
            get: function () {
                var ret = !tmp;
                tmp = 0;
                return ret;

            },
            set: function () {
                tmp += f ? 1 : -1;
            }
        });
    })();
    (function () {
        var tmp = 0;
        Object.defineProperty(types, tstr(null), {
            get: function () {
                var ret = !tmp;
                tmp = 0;
                return ret;
            },
            set: function () {
                tmp += f ? 1 : -1;
            }
        });
    })();

    var tIt = [tstr(1), tstr(""), tstr({}), tstr(undefined), tstr(null)];

    return function eq(a, b) {

        f = true;
        for (var i = a.length - 1; i >= 0; i--) {
            var v = a[i];
            types[tstr(v)] = v;
        }
        f = false;
        for (var k = b.length - 1; k >= 0; k--) {
            var w = b[k];
            types[tstr(w)] = w;
        }
        var r = 1;
        for (var l = 0, j; j = tIt[l]; l++) {
            r &= types [j]
        }

        return !!r;
    }
    })()

Here is a JSFiddle and a JSPerf (it uses the same Arrays a and b as in the previous answers perf) with this code vs the Closure compiled

Heres the output. note: it doesn't support a deep comparison anymore, as is

var foo = {a:2}    
var bar = {a:1};
var a = [1, 2, foo, 2, bar, 2];
var b = [foo, 2, 2, 2, bar, 1];
var c = [bar, 2, 2, 2, bar, 1];
console.log(sameElements([NaN],[NaN])); //true
console.log (sameElements ( a,b))  //true
console.log (sameElements (b,c))   //false
查看更多
Summer. ? 凉城
5楼-- · 2020-03-01 20:21

i wasn't sure if "===" is ok, the question is a bit vauge... if so, this is quite a bit faster and simpler than some other possible ways of doing it:

function isSame(a,b){
  return a.length==b.length && 
      a.filter(function(a){ return b.indexOf(a)!==-1 }).length == b.length;
}
查看更多
ゆ 、 Hurt°
6楼-- · 2020-03-01 20:25

Thanks everyone for sharing ideas! I've came up with the following

function sameElements(a, b) {
    var hash = function(x) {
        return typeof x + (typeof x == "object" ? a.indexOf(x) : x);
    }
    return a.map(hash).sort().join() == b.map(hash).sort().join();
}

This isn't the fastest solution, but IMO, most readable one so far.

查看更多
放荡不羁爱自由
7楼-- · 2020-03-01 20:27

UPDATE

As @Bergi and @thg435 point out my previous implementation was flawed so here is another implementation:

function sameElements(a, b) {
    var objs = [];
    // if length is not the same then must not be equal
    if (a.length != b.length) return false;

    // do an initial sort which will group types
    a.sort();
    b.sort();

    for ( var i = 0; i < a.length; i++ ) {

        var aIsPrimitive = isPrimitive(a[i]);
        var bIsPrimitive = isPrimitive(b[i]);

        // NaN will not equal itself
        if( a[i] !== a[i] ) {
            if( b[i] === b[i] ) {
                return false;
            }
        }
        else if (aIsPrimitive && bIsPrimitive) {

            if( a[i] != b[i] ) return false;
        }
        // if not primitive increment the __count property
        else if (!aIsPrimitive && !bIsPrimitive) {
            incrementCountA(a[i]);
            incrementCountB(b[i]);
            // keep track on non-primitive objects
            objs.push(i);
        }
        // if both types are not the same then this array
        // contains different number of primitives
        else {
            return false;
        }

    }

    var result = true;

    for (var i = 0; i < objs.length; i++) {
        var ind = objs[i];
        // if __aCount and __bCount match then object exists same
        // number of times in both arrays
        if( a[ind].__aCount !== a[ind].__bCount ) result = false;
        if( b[ind].__aCount !== b[ind].__bCount ) result = false;

        // revert object to what it was 
        // before entering this function
        delete a[ind].__aCount;
        delete a[ind].__bCount;
        delete b[ind].__aCount;
        delete b[ind].__bCount;
    }

    return result;
}

// inspired by @Bergi's code
function isPrimitive(arg) {
    return Object(arg) !== arg;
}

function incrementCountA(arg) {
    if (arg.hasOwnProperty("__aCount")) {
        arg.__aCount = arg.__aCount + 1;
    } else {
        Object.defineProperty(arg, "__aCount", {
            enumerable: false,
            value: 1,
            writable: true,
            configurable: true
        });
    }
}
function incrementCountB(arg) {
    if (arg.hasOwnProperty("__bCount")) {
        arg.__bCount = arg.__bCount + 1;
    } else {
        Object.defineProperty(arg, "__bCount", {
            enumerable: false,
            value: 1,
            writable: true,
            configurable: true
        });
    }
}

Then just call the function

sameElements( ["NaN"], [NaN] ); // false

// As "1" == 1 returns true
sameElements( [1],["1"] ); // true

sameElements( [1,2], [1,2,3] ); //false

The above implement actually defines a new property called "__count" that is used to keep track of non-primitive elements in both arrays. These are deleted before the function returns so as to leave the array elements as before.

Fiddle here

jsperf here.

The reason I changed the jsperf test case was that as @Bergi states the test arrays, especially the fact there were only 2 unique objects in the whole array is not representative of what we are testing for.

One other advantage of this implementation is that if you need to make it compatible with pre IE9 browsers instead of using the defineProperty to create a non-enumerable property you could just use a normal property.

查看更多
登录 后发表回答