My data:
data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
I want as an output that according to majority vote, there are two communities (with their elements). Something like: group1={item1, item2}
, group2={item3}
.
You can try this, base R
:
res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))
setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
#$`1`
#[1] "item 1" "item 2"
#$`2`
#[1] "item 3"
This function is passed a matrix where each column is an item and each row is a membership vector corresponding to a partition of the items according to a clustering method. The elements (numbers) composing each row have no meaning other than indicating membership and are recycled from row to row. The function returns the majority vote partition. When no consensus exists for an item, the partition given by the first row wins. This allows ordering of the partitions by decreasing values of modularity, for instance.
consensus.final <-
function(data){
output=list()
for (i in 1:nrow(data)){
row=as.numeric(data[i,])
output.inner=list()
for (j in 1:length(row)){
group=character()
group=c(group,colnames(data)[which(row==row[j])])
output.inner[[j]]=group
}
output.inner=unique(output.inner)
output[[i]]=output.inner
}
# gives the mode of the vector representing the number of groups found by each method
consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])
# removes the elements of the list that do not correspond to this consensus solution
output=output[lapply(output,length)==consensus.n.comm]
# 1) find intersection
# 2) use majority vote for elements of each vector that are not part of the intersection
group=list()
for (i in 1:consensus.n.comm){
list.intersection=list()
for (p in 1:length(output)){
list.intersection[[p]]=unlist(output[[p]][i])
}
# candidate group i
intersection=Reduce(intersect,list.intersection)
group[[i]]=intersection
# we need to reinforce that group
for (p in 1:length(list.intersection)){
vector=setdiff(list.intersection[[p]],intersection)
if (length(vector)>0){
for (j in 1:length(vector)){
counter=vector(length=length(list.intersection))
for (k in 1:length(list.intersection)){
counter[k]=vector[j]%in%list.intersection[[k]]
}
if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
group[[i]]=c(group[[i]],vector[j])
}
}
}
}
}
group=lapply(group,unique)
# variables for which consensus has not been reached
unclassified=setdiff(colnames(data),unlist(group))
if (length(unclassified)>0){
for (pp in 1:length(unclassified)){
temp=matrix(nrow=length(output),ncol=consensus.n.comm)
for (i in 1:nrow(temp)){
for (j in 1:ncol(temp)){
temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
}
}
# use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
index.best=which(temp[1,]==TRUE)
group[[index.best]]=c(group[[index.best]],unclassified[pp])
}
}
output=list(group=group,unclassified=unclassified)
}
Some examples:
data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group
[[1]]
[1] "item 1" "item 2"
[[2]]
[1] "item 3"
data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group
[[1]]
[1] "item 1" "item 2" "item 3"
data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:4)
data
consensus.final(data)$group
[[1]]
[1] "item 1"
[[2]]
[1] "item 2"
[[3]]
[1] "item 3"