Hi I have a very long list of key value pairs in json key:value, key:value and so on
car <--> wheel
wheel <--> tyre
bed <--> sheets
guitar <--> strings
guitar <--> pickup
tyre <--> rubber
What I want is to group all relations into arrays no matter how distant like this
[car, wheel, tyre, rubber]
[guitar, strings, pickup]
[bed, sheets]
What is an efficient way to do this with Javascript?
First of all, I would store the relationships as arrays so that you can have duplicate "keys." Key methods: an initial dictionary including every word related to each individual word; a recursive chain expander using map and reduce; filtering chains based on equivalency.
Array.prototype.getUnique = function(){
var u = {}, a = [];
for(var i = 0, l = this.length; i < l; ++i){
if(u.hasOwnProperty(this[i])) {
continue;
}
a.push(this[i]);
u[this[i]] = 1;
}
return a;
}
var links = {};
var pairs = [
["car", "wheel"],
["wheel", "tyre"],
["bed", "sheets"],
["guitar", "strings"],
["guitar", "pickup"],
["rubber", "tyre"],
["truck", "wheel"],
["pickup", "car"]
];
pairs.map(function(pair) {
links[pair[0]] = links[pair[0]] || [];
links[pair[1]] = links[pair[1]] || [];
links[pair[0]].push(pair[1]);
links[pair[1]].push(pair[0]);
});
var append = function(list) {
var related = list.map(function(item) {
return links[item];
}).reduce(function(listA, listB) {
return listA.concat(listB);
}).filter(function(item) {
// make sure related only includes new links
return list.indexOf(item) == -1
}).getUnique();
return related.length ? append(list.concat(related)) : list.concat(related);
};
var branches = [];
for( var word in links ) {
branches.push(append(links[word].concat(word)));
}
var compareArrays = function(listA, listB) {
if( listA.length != listB.length ) return false;
return listA.map(function(element) {
if( listB.indexOf(element) == -1 ) return 0;
return 1;
}).filter(function(el) {
return el == 1;
}).length == listA.length;
};
var _branches = branches;
var chains = branches.filter(function(branch1, i) {
var isUnique = _branches.filter(function(branch2) {
// are they equivalent
return compareArrays(branch1, branch2);
}).length == 1;
delete _branches[i];
return isUnique;
});
I'd go with a map of words, linking the sets they are currently in. The map (a javascript object) with nearly O(1) runtime for accessing a key should help the performance. Start with the same format as proposed by @matt3141:
var pairs = [
["car", "wheel"],
["wheel", "tyre"],
["bed", "sheets"],
["guitar", "strings"],
["guitar", "pickup"],
["rubber", "tyre"],
["truck", "wheel"],
["pickup", "car"]
];
var setsByWord = {};
for (var i=0; i<pairs.length; i++) {
var pair = pairs[i];
if (pair[0] in setsByWord && pair[1] in setsByWord) {
// both words are already known
if (setsByWord[pair[0]] === setsByWord[pair[1]]) {
; // We're lucky, they are in the same set
} else {
// combine the two sets
var sets = [setsByWord[pair[0]], setsByWord[pair[1]]];
var larger = sets[1].length > sets[0].length ? sets[1] : sets[0],
smaller = sets[+(larger===sets[0])];
for (var j=0; j<smaller.length; j++)
setsByWord[smaller[j]] = larger;
Array.prototype.push.apply(larger, smaller);
}
} else {
// add the missing word to the existing set
// or create a new set
var set = setsByWord[pair[0]] || setsByWord[pair[1]] || [];
if (!(pair[0] in setsByWord)) {
set.push(pair[0]);
setsByWord[pair[0]] = set;
}
if (!(pair[1] in setsByWord)) {
set.push(pair[1]);
setsByWord[pair[1]] = set;
}
}
}
return setsByWord;
This will split your graph in its connected components (In the setsByWord
object these component arrays are indexed by the nodes):
> var results = [];
> for (var word in setsByWord)
> if (results.indexOf(setsByWord[word])<0)
> results.push(setsByWord[word]);
> return results;
[
["car","wheel","tyre","rubber","truck","guitar","strings","pickup"],
["bed","sheets"]
]
If you have a directed graph, and want arrays of all successors by word, you could use this:
var pairs = […],
graph = pairs.reduce(function(map, pair) {
(map[pair[0]] || (map[pair[0]] = [])).push(pair[1]);
return map;
}, {});
var successors = {};
for (var word in graph) (function getSuccessors(word) {
if (word in successors)
return successors[word];
successors[word] = [true]; // some marker against circles
return successors[word] = word in graph
? [].concat.apply(graph[word], graph[word].map(getSuccessors))
: [];
})(word);
return successors;
If you are sure to have no circles in the graph and only want lists for the beginners of paths, you might add this:
var results = [];
for (var word in successors)
for (var i=0; word in successors && i<successors[word].length; i++)
delete successors[successors[word][i]];
for (var word in successors)
results.push([word].concat(successors[word]));
return results;
// becomes:
[
["bed","sheets"],
["guitar","strings","pickup","car","wheel","tyre"],
["rubber","tyre"],
["truck","wheel","tyre"]
]