Difference and intersection of two arrays containi

2019-01-07 23:18发布

问题:

I have two arrays list1 and list2 which have objects with some properties; userId is the Id or unique property:

list1 = [
    { userId: 1234, userName: 'XYZ'  }, 
    { userId: 1235, userName: 'ABC'  }, 
    { userId: 1236, userName: 'IJKL' },
    { userId: 1237, userName: 'WXYZ' }, 
    { userId: 1238, userName: 'LMNO' }
]

list2 = [
    { userId: 1235, userName: 'ABC'  },  
    { userId: 1236, userName: 'IJKL' },
    { userId: 1252, userName: 'AAAA' }
]

I'm looking for an easy way to execute the following three operations:

  1. list1 operation list2 should return the intersection of elements:

    [
        { userId: 1235, userName: 'ABC'  },
        { userId: 1236, userName: 'IJKL' }
    ]
    
  2. list1 operation list2 should return the list of all elements from list1 which don't occur in list2:

    [
        { userId: 1234, userName: 'XYZ'  },
        { userId: 1237, userName: 'WXYZ' }, 
        { userId: 1238, userName: 'LMNO' }
    ]
    
  3. list2 operation list1 should return the list of elements from list2 which don't occur in list1:

    [
        { userId: 1252, userName: 'AAAA' }
    ]
    

回答1:

You could define three functions inBoth, inFirstOnly, and inSecondOnly which all take two lists as arguments, and return a list as can be understood from the function name. The main logic could be put in a common function operation that all three rely on.

Here are a few implementations for that operation to choose from, for which you can find a snippet further down:

  • Plain old JavaScript for loops
  • Arrow functions using filter and some array methods
  • Optimised lookup with a Set

Plain old for loops

// Generic helper function that can be used for the three operations:        
function operation(list1, list2, isUnion) {
    var result = [];
    
    for (var i = 0; i < list1.length; i++) {
        var item1 = list1[i],
            found = false;
        for (var j = 0; j < list2.length && !found; j++) {
            found = item1.userId === list2[j].userId;
        }
        if (found === !!isUnion) { // isUnion is coerced to boolean
            result.push(item1);
        }
    }
    return result;
}

// Following functions are to be used:
function inBoth(list1, list2) {
    return operation(list1, list2, true);
}

function inFirstOnly(list1, list2) {
    return operation(list1, list2);
}

function inSecondOnly(list1, list2) {
    return inFirstOnly(list2, list1);
}

// Sample data
var list1 = [
    { userId: 1234, userName: 'XYZ'  }, 
    { userId: 1235, userName: 'ABC'  }, 
    { userId: 1236, userName: 'IJKL' },
    { userId: 1237, userName: 'WXYZ' }, 
    { userId: 1238, userName: 'LMNO' }
];
var list2 = [
    { userId: 1235, userName: 'ABC'  },  
    { userId: 1236, userName: 'IJKL' },
    { userId: 1252, userName: 'AAAA' }
];
  
console.log('inBoth:', inBoth(list1, list2)); 
console.log('inFirstOnly:', inFirstOnly(list1, list2)); 
console.log('inSecondOnly:', inSecondOnly(list1, list2)); 

Arrow functions using filter and some array methods

This uses some ES5 and ES6 features:

// Generic helper function that can be used for the three operations:        
const operation = (list1, list2, isUnion = false) =>
    list1.filter( a => isUnion === list2.some( b => a.userId === b.userId ) );

// Following functions are to be used:
const inBoth = (list1, list2) => operation(list1, list2, true),
      inFirstOnly = operation,
      inSecondOnly = (list1, list2) => inFirstOnly(list2, list1);

// Sample data
const list1 = [
    { userId: 1234, userName: 'XYZ'  }, 
    { userId: 1235, userName: 'ABC'  }, 
    { userId: 1236, userName: 'IJKL' },
    { userId: 1237, userName: 'WXYZ' }, 
    { userId: 1238, userName: 'LMNO' }
];
const list2 = [
    { userId: 1235, userName: 'ABC'  },  
    { userId: 1236, userName: 'IJKL' },
    { userId: 1252, userName: 'AAAA' }
];
  
console.log('inBoth:', inBoth(list1, list2)); 
console.log('inFirstOnly:', inFirstOnly(list1, list2)); 
console.log('inSecondOnly:', inSecondOnly(list1, list2));

Optimising lookup

The above solutions have a O(n²) time complexity because of the nested loop -- some represents a loop as well. So for large arrays you'd better create a (temporary) hash on user-id. This can be done on-the-fly by providing a Set (ES6) as argument to a function that will generate the filter callback function. That function can then perform the look-up in constant time with has:

// Generic helper function that can be used for the three operations:        
const operation = (list1, list2, isUnion = false) =>
    list1.filter(
        (set => a => isUnion === set.has(a.userId))(new Set(list2.map(b => b.userId)))
    );

// Following functions are to be used:
const inBoth = (list1, list2) => operation(list1, list2, true),
      inFirstOnly = operation,
      inSecondOnly = (list1, list2) => inFirstOnly(list2, list1);

// Sample data
const list1 = [
    { userId: 1234, userName: 'XYZ'  }, 
    { userId: 1235, userName: 'ABC'  }, 
    { userId: 1236, userName: 'IJKL' },
    { userId: 1237, userName: 'WXYZ' }, 
    { userId: 1238, userName: 'LMNO' }
];
const list2 = [
    { userId: 1235, userName: 'ABC'  },  
    { userId: 1236, userName: 'IJKL' },
    { userId: 1252, userName: 'AAAA' }
];
  
console.log('inBoth:', inBoth(list1, list2)); 
console.log('inFirstOnly:', inFirstOnly(list1, list2)); 
console.log('inSecondOnly:', inSecondOnly(list1, list2));



回答2:

Use lodash's _.isEqual method. Specifically:

list1.reduce(function(prev, curr){
  !list2.some(function(obj){
    return _.isEqual(obj, curr)
  }) ? prev.push(curr): false;
  return prev
}, []);

Above gives you the equivalent of A given !B (in SQL terms, A LEFT OUTER JOIN B). You can move the code around the code to get what you want!



回答3:

function intersect(first, second) {
    return intersectInternal(first, second, function(e){ return e });
}

function unintersect(first, second){
    return intersectInternal(first, second, function(e){ return !e });  
}

function intersectInternal(first, second, filter) {
    var map = {};

    first.forEach(function(user) { map[user.userId] = user; });

    return second.filter(function(user){ return filter(map[user.userId]); })
}


回答4:

This is the solution that worked for me.

 var intersect = function (arr1, arr2) {
            var intersect = [];
            _.each(arr1, function (a) {
                _.each(arr2, function (b) {
                    if (compare(a, b))
                        intersect.push(a);
                });
            });

            return intersect;
        };

 var unintersect = function (arr1, arr2) {
            var unintersect = [];
            _.each(arr1, function (a) {
                var found = false;
                _.each(arr2, function (b) {
                    if (compare(a, b)) {
                        found = true;    
                    }
                });

                if (!found) {
                    unintersect.push(a);
                }
            });

            return unintersect;
        };

        function compare(a, b) {
            if (a.userId === b.userId)
                return true;
            else return false;
        }


回答5:

Here is a functionnal programming solution with underscore/lodash to answer your first question (intersection).

list1 = [ {userId:1234,userName:'XYZ'}, 
          {userId:1235,userName:'ABC'}, 
          {userId:1236,userName:'IJKL'},
          {userId:1237,userName:'WXYZ'}, 
          {userId:1238,userName:'LMNO'}
        ];

list2 = [ {userId:1235,userName:'ABC'},  
          {userId:1236,userName:'IJKL'},
          {userId:1252,userName:'AAAA'}
        ];

_.reduce(list1, function (memo, item) {
        var same = _.findWhere(list2, item);
        if (same && _.keys(same).length === _.keys(item).length) {
            memo.push(item);
        }
        return memo
    }, []);

I'll let you improve this to answer the other questions ;-)