get consensus of multiple partitioning methods in

2019-02-27 07:48发布

问题:

My data:

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)

I want as an output that according to majority vote, there are two communities (with their elements). Something like: group1={item1, item2}, group2={item3}.

回答1:

You can try this, base R:

res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))

setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
#$`1`
#[1] "item 1" "item 2"

#$`2`
#[1] "item 3"


回答2:

This function is passed a matrix where each column is an item and each row is a membership vector corresponding to a partition of the items according to a clustering method. The elements (numbers) composing each row have no meaning other than indicating membership and are recycled from row to row. The function returns the majority vote partition. When no consensus exists for an item, the partition given by the first row wins. This allows ordering of the partitions by decreasing values of modularity, for instance.

    consensus.final <-
  function(data){
    output=list()
    for (i in 1:nrow(data)){
      row=as.numeric(data[i,])
      output.inner=list()
      for (j in 1:length(row)){
        group=character()
        group=c(group,colnames(data)[which(row==row[j])])
        output.inner[[j]]=group
      }
      output.inner=unique(output.inner)
      output[[i]]=output.inner
    }

    # gives the mode of the vector representing the number of groups found by each method
    consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])

    # removes the elements of the list that do not correspond to this consensus solution
    output=output[lapply(output,length)==consensus.n.comm]

    # 1) find intersection 
    # 2) use majority vote for elements of each vector that are not part of the intersection

    group=list()

    for (i in 1:consensus.n.comm){ 
      list.intersection=list()
      for (p in 1:length(output)){
        list.intersection[[p]]=unlist(output[[p]][i])
      }

      # candidate group i
      intersection=Reduce(intersect,list.intersection)
      group[[i]]=intersection

      # we need to reinforce that group
      for (p in 1:length(list.intersection)){
        vector=setdiff(list.intersection[[p]],intersection)
        if (length(vector)>0){
          for (j in 1:length(vector)){
            counter=vector(length=length(list.intersection))
            for (k in 1:length(list.intersection)){
              counter[k]=vector[j]%in%list.intersection[[k]]
            }
            if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
              group[[i]]=c(group[[i]],vector[j])
            }
          }
        }
      }
    }

    group=lapply(group,unique)

    # variables for which consensus has not been reached
    unclassified=setdiff(colnames(data),unlist(group))

    if (length(unclassified)>0){
      for (pp  in 1:length(unclassified)){
        temp=matrix(nrow=length(output),ncol=consensus.n.comm)
        for (i in 1:nrow(temp)){
          for (j in 1:ncol(temp)){
            temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
          }
        }
        # use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
        index.best=which(temp[1,]==TRUE)
        group[[index.best]]=c(group[[index.best]],unclassified[pp])
      }
    }
    output=list(group=group,unclassified=unclassified)
  }

Some examples:

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2"

[[2]]
[1] "item 3"

data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2)) 
colnames(data)=paste("item",1:3) 
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2" "item 3"

data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:4)
data
consensus.final(data)$group

[[1]]
[1] "item 1"

[[2]]
[1] "item 2"

[[3]]
[1] "item 3"