Javascript group linked key values into arrays

2019-07-23 14:04发布

问题:

Hi I have a very long list of key value pairs in json key:value, key:value and so on

car <--> wheel
wheel <--> tyre
bed <--> sheets
guitar <--> strings
guitar <--> pickup
tyre <--> rubber

What I want is to group all relations into arrays no matter how distant like this

[car, wheel, tyre, rubber]
[guitar, strings, pickup]
[bed, sheets]

What is an efficient way to do this with Javascript?

回答1:

First of all, I would store the relationships as arrays so that you can have duplicate "keys." Key methods: an initial dictionary including every word related to each individual word; a recursive chain expander using map and reduce; filtering chains based on equivalency.

Array.prototype.getUnique = function(){
   var u = {}, a = [];
   for(var i = 0, l = this.length; i < l; ++i){
      if(u.hasOwnProperty(this[i])) {
         continue;
      }
      a.push(this[i]);
      u[this[i]] = 1;
   }
   return a;
}
var links = {};
var pairs = [
    ["car", "wheel"],
    ["wheel", "tyre"],
    ["bed", "sheets"],
    ["guitar", "strings"],
    ["guitar", "pickup"],
    ["rubber", "tyre"],
    ["truck", "wheel"],
    ["pickup", "car"]
];
pairs.map(function(pair) {
    links[pair[0]] = links[pair[0]] || [];
    links[pair[1]] = links[pair[1]] || [];

    links[pair[0]].push(pair[1]);
    links[pair[1]].push(pair[0]);
});
var append = function(list) {
    var related = list.map(function(item) {
        return links[item];
    }).reduce(function(listA, listB) {
        return listA.concat(listB);
    }).filter(function(item) {
        // make sure related only includes new links
        return list.indexOf(item) == -1
    }).getUnique();

    return related.length ? append(list.concat(related)) : list.concat(related);
};
var branches = [];
for( var word in links ) {
    branches.push(append(links[word].concat(word)));
}
var compareArrays = function(listA, listB) {
    if( listA.length != listB.length ) return false;
    return listA.map(function(element) {
        if( listB.indexOf(element) == -1 ) return 0;
        return 1;
    }).filter(function(el) {
        return el == 1;
    }).length == listA.length;
};
var _branches = branches;
var chains = branches.filter(function(branch1, i) {     
    var isUnique = _branches.filter(function(branch2) {
        // are they equivalent
        return compareArrays(branch1, branch2);
    }).length == 1; 
    delete _branches[i];
    return isUnique;
});


回答2:

I'd go with a map of words, linking the sets they are currently in. The map (a javascript object) with nearly O(1) runtime for accessing a key should help the performance. Start with the same format as proposed by @matt3141:

var pairs = [
    ["car", "wheel"],
    ["wheel", "tyre"],
    ["bed", "sheets"],
    ["guitar", "strings"],
    ["guitar", "pickup"],
    ["rubber", "tyre"],
    ["truck", "wheel"],
    ["pickup", "car"]
];

var setsByWord = {};
for (var i=0; i<pairs.length; i++) {
    var pair = pairs[i];
    if (pair[0] in setsByWord && pair[1] in setsByWord) {
        // both words are already known
        if (setsByWord[pair[0]] === setsByWord[pair[1]]) {
             ; // We're lucky, they are in the same set
        } else {
             // combine the two sets
             var sets = [setsByWord[pair[0]], setsByWord[pair[1]]];
             var larger = sets[1].length > sets[0].length ? sets[1] : sets[0],
                 smaller = sets[+(larger===sets[0])];
             for (var j=0; j<smaller.length; j++)
                 setsByWord[smaller[j]] = larger;
             Array.prototype.push.apply(larger, smaller);
        }
    } else {
        // add the missing word to the existing set
        // or create a new set
        var set = setsByWord[pair[0]] || setsByWord[pair[1]] || [];
        if (!(pair[0] in setsByWord)) {
            set.push(pair[0]);
            setsByWord[pair[0]] = set;
        }
        if (!(pair[1] in setsByWord)) {
            set.push(pair[1]);
            setsByWord[pair[1]] = set;
        }
    }
}
return setsByWord;

This will split your graph in its connected components (In the setsByWord object these component arrays are indexed by the nodes):

> var results = [];
> for (var word in setsByWord)
>     if (results.indexOf(setsByWord[word])<0)
>         results.push(setsByWord[word]);
> return results;

[
    ["car","wheel","tyre","rubber","truck","guitar","strings","pickup"],
    ["bed","sheets"]
]

If you have a directed graph, and want arrays of all successors by word, you could use this:

var pairs = […],
    graph = pairs.reduce(function(map, pair) {
         (map[pair[0]] || (map[pair[0]] = [])).push(pair[1]);
         return map;
    }, {});

var successors = {};
for (var word in graph) (function getSuccessors(word) {
    if (word in successors)
        return successors[word];
    successors[word] = [true]; // some marker against circles
    return successors[word] = word in graph
        ? [].concat.apply(graph[word], graph[word].map(getSuccessors))
        : [];
})(word);
return successors;

If you are sure to have no circles in the graph and only want lists for the beginners of paths, you might add this:

var results = [];
for (var word in successors)
    for (var i=0; word in successors && i<successors[word].length; i++)
        delete successors[successors[word][i]];
for (var word in successors)
    results.push([word].concat(successors[word]));
return results;

// becomes:
[
   ["bed","sheets"],
   ["guitar","strings","pickup","car","wheel","tyre"],
   ["rubber","tyre"],
   ["truck","wheel","tyre"]
]