R: ggplot slight adjustment for clustering summary

2019-08-07 02:18发布

问题:

Please check my reproducible example and the result chart.

X = t(USArrests)

plot_color_clust = function(X,N=N,
cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white") 
                            ){
  library(ggplot2)
  library(gridExtra)
  library(gtable)
  library(scales)
  library(ggdendro)
  library(grid)
  library(plyr)

  if(N>length(cols)) stop("N too big. Not enough colors in cols.")
  if(N>ncol(X)) stop("N too big. Not enough columns in data.")

  fit = ClustOfVar::hclustvar(X.quanti = X)
  dd.row = as.dendrogram(fit)
  ddata_x <- dendro_data(dd.row)
  temp = cutree(fit,k=N)
  lab <- ggdendro::label(ddata_x)

  x=c()
  for(i in 1:nrow(lab)){
    x[i]=    paste( "clust", as.vector(temp[ lab$label[i]==names(temp) ])   ,sep="")
  }

  lab$group <- x

  p1 <- ggplot(segment(ddata_x)) + 
    geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+coord_flip()+
    geom_text(data=lab,
              aes(label=label, x=x, y=0, colour=group),hjust=1) +
    theme(legend.position="none",
          axis.title.y=element_blank(),
          axis.title.x=element_blank(),
          axis.text.x = element_text(angle = 0, hjust = 0),
          axis.title.x = element_text(angle = 0, hjust = 0))+
    theme(axis.text = element_blank(), axis.title = element_blank(), 
          axis.ticks = element_blank(), axis.ticks.margin = unit(0, "lines"), 
          axis.ticks.length = unit(0, "cm"))+
    scale_colour_manual(values=cols)+coord_flip()+
    scale_y_continuous(limits = c(-0.1, 2.1))

  df2<-data.frame(cluster=cutree(fit,N),states=factor(fit$labels,levels=fit$labels[fit$order]))
  df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
  p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
    scale_y_continuous(expand=c(0,0))+
    theme(axis.title=element_blank(),
          axis.ticks=element_blank(),
          axis.text=element_blank(),
          legend.position="none")+coord_flip()+
    geom_text(data=df3,aes(x=pos,label=cluster))+
    scale_fill_manual(name = "This is my title", values = cols)

  gp1<-ggplotGrob(p1)
  gp2<-ggplotGrob(p2)  
  maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
  gp1$heights[2:5] <- as.list(maxHeight)
  gp2$heights[2:5] <- as.list(maxHeight)
  #grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
  R = arrangeGrob(gp2,gp1,ncol=2,widths=c(1/6,5/6))
  R

}

plot_color_clust(X,6)

Questions:

  1. These two parts (left colors tiles and right clustering tree) has inconsistent heights. How do we adjust their heights for them to match each other's?

  2. How can we make the tree on the right side shorter so states names (clustered subjects) can have more space to be fully displayed?

  3. Is there a way make the white space between those two parts smaller?

Your tweaking of the code is appreciated. Thanks.

回答1:

One major change: Rather than matching heights of the two charts, I extract the plot panel from gp2, then insert it into column 2 of gp1. There are no margins surrounding the resultant gp2, and thus, partly takes care of your point 3.

With respect to point 2: expand the limits of the axis to make room of the labels. (See point 2. in the code below). The parameters for points 2 and 3 were set by trial-and-error. Adjusting one parameter means the other needs to be adjusted.

With respect to point 1: expand the axis using the additive component of exapnd to add half a unit to each end of the axis (See point 1. in the code below).

Minor edit: updating to ggplot2 2.2.0 and R 3.3.2
axis.ticks.margin is deprecated

X = t(USArrests)

plot_color_clust = function(X, N = N,
 #  cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
   cols = rainbow(N)   # Easier to pick colours
  ){

  library(ggplot2)
  library(gtable)
  library(grid)
  library(ggdendro)
  library(plyr)

  if(N > length(cols)) stop("N too big. Not enough colors in cols.")
  if(N > ncol(X)) stop("N too big. Not enough columns in data.")

  fit = ClustOfVar::hclustvar(X.quanti = X)
  dd.row = as.dendrogram(fit)
  ddata_x <- dendro_data(dd.row)
  temp = cutree(fit, k = N)
  lab <- ggdendro::label(ddata_x)

  x = c()
  for(i in 1:nrow(lab)){
    x[i] = paste("clust", as.vector(temp[lab$label[i] == names(temp)]), sep = "")
  }

  lab$group <- x

  p1 <- ggplot(segment(ddata_x)) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_text(data = lab, aes(label = label, x = x, y = -.05, colour = group),  # y = -.05 adds a little space between label and tree
              size = 4, hjust = 1) +
    scale_x_continuous(expand = c(0, .5)) +   # 1. Add half a unit to each end of the vertical axis
    expand_limits(y = -0.4) +   # 2. Make room for labels 
    theme_classic() + 
    scale_colour_manual(values = cols) + 
    coord_flip() +
    theme(legend.position = "none", axis.line = element_blank(),
          axis.text = element_blank(), axis.title = element_blank(), 
          axis.ticks = element_blank(), 
          axis.ticks.length = unit(0, "cm")) 

  df2 <- data.frame(cluster = cutree(fit, N), 
      states = factor(fit$labels, levels = fit$labels[fit$order]))
  df3 <- ddply(df2, .(cluster),summarise,pos=mean(as.numeric(states)))
  p2 <- ggplot(df2, aes(states, y = 1, 
                    fill = factor(as.character(cluster)))) +   # 'as.character' - so that colours match with 10 or more clusters
    geom_tile() +
    scale_y_continuous(expand = c(0, 0)) + 
    scale_x_discrete(expand = c(0, 0)) +
    coord_flip() +
    geom_text(data = df3,aes(x = pos, label = cluster, size = 12)) +
    scale_fill_manual(values = cols)

  gp1 <- ggplotGrob(p1)  # Get ggplot grobs
  gp2 <- ggplotGrob(p2)  

  gp2 <- gp2[6, 4]      # 3. Grab plot panel only from tiles plot (thus, no margins)
  gp1 <- gtable_add_grob(gp1, gp2, t = 6, l = 2, name = "tiles")  # 3. Insert it into dendrogram plot
  gp1$widths[2] = unit(1, "cm")  # 3. Set width of column containing tiles

  grid.newpage()
  grid.draw(gp1)
}

plot_color_clust(X, 6)



标签: r ggplot2