ggplot2: display blocks of nested split violins

2019-05-31 11:02发布

问题:

I have the following dataset:

df <- data.frame(dens = rnorm(5000),
             split = as.factor(sample(1:2, 5000, replace = T)),
             method = as.factor(sample(c("A","B"), 5000, replace = T)),
             counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))

What i am wanting to do is to do split violin plots for splits 1 and 2 within groups A and B for each count (which would be in the logscale, but that is not important for this example). We have four groups for each setting but there is a nested aspect to it.

So, I can do the following:

df$key <- factor(paste(df$split, df$method))

and then:

library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = split)) +
geom_violin(aes(fill = key), scale = "width", draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values = cbPalette) + theme_bw()

which gives me the following plot:

But what I want is really the light blue and the dark blue to be the two halves of a split violin plot and the light green and the dark green to be the two halves of another split violin plot and these plots should be bunched together. I would also like the different counts to be more separated from each other, but i feel that I can figure that out.

Note that this question is different than the one I have listed or Split violin plot with ggplot2 because we are bunching two different levels of nested split violin plots for each "Counts".

I was trying to follow enter link description here but I can not tell how to add such a nested groups setting to the code there and am looking for some advice.

Here is what I have tried:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
  draw_group = function(self, data, ..., draw_quantiles = NULL){
    # By @YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
    data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
    grp <- data[1,'group']
    newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
    newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
    newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
      stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
      quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
      aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
      aesthetics$alpha <- rep(1, nrow(quantiles))
      both <- cbind(quantiles, aesthetics)
      quantile_grob <- GeomPath$draw_panel(both, ...)
      ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
    }
    else {
      ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
    }
  }
)

create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
  dens <- cumsum(data$density)/sum(data$density)
  ecdf <- stats::approxfun(dens, data$y)
  ys <- ecdf(draw_quantiles)
  violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
  violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
  violin.xs <- (stats::approxfun(data$y, data$x))(ys)
  if (grp %% 2 == 0) {
    data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs), 
               y = rep(ys, each = 2), group = rep(ys, each = 2)) 
  } else {
    data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs), 
               y = rep(ys, each = 2), group = rep(ys, each = 2)) 
  }
}



geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

library(ggplot2)
ggplot(df, aes(x = factor(counts), y = dens, fill = interaction(split,method))) +
           geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom")

And here is what I get:

As can be seen, the green images are on top of the blues. How do I get around this? Thanks!

EDIT: Folllowing Axeman's suggestion, I am almost there:

   levels(df$split) <- factor(0:3)

   library(ggplot2)
   ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(interaction(df$split,df$counts)[-length(interaction(df$split,df$counts))], drop = FALSE)

So almost there!

Would like two fixes: the white space arising from the last interaction between split and counts, and the scale to only have counts for each bunch. Wonder if these should be separate questions on Stackoverflow.

Almost there!

library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) + geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE)

This yields:

I still need to place the value of counts on the x-axis, in between the two plots.

回答1:

I think that this question has become too long and the basic parts of this question have been answered. I have put up a new question on how to change the discrete scale. Hopefully, someone will know! Anyway, here is the answer to this question (thanks, Axe!). It is in the edited version of my question.

library(ggplot2)
df <- data.frame(dens = rnorm(5000),
             split = factor(sample(1:2, 5000, replace = T)),
             method = factor(sample(c("A","B"), 5000, replace = T)),
             counts = factor(sample(c(1, 10, 100, 1000, 10000), 5000, replace = T)))

df$key <- factor(paste(df$split, df$method))

levels(df$split) <- factor(0:2)
library(ggplot2)

ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  theme_light() + 
  theme(legend.position="bottom") + 
  scale_x_discrete(
    limits = levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],
    drop = FALSE, 
    name = "Counts"
  )


标签: r ggplot2