Jitter text/labels with position_stack

2019-03-25 05:28发布

Consider the following data.frame and chart:

library(ggplot2)
library(scales)
df <- data.frame(L=rep(LETTERS[1:2],each=4),
                 l=rep(letters[1:4],2),
                 val=c(96.5,1,2,0.5,48,0.7,0.3,51))
#   L l  val
# 1 A a 96.5
# 2 A b  1.0
# 3 A c  2.0
# 4 A d  0.5
# 5 B a 48.0
# 6 B b  0.7
# 7 B c  0.3
# 8 B d 51.0

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),position=position_stack(vjust =0.5))

plot1 Some labels are hard to read due to small values. I'd like to jitter those vertically. I'm aware of position_jitter but it doesn't seem compatible with a stacked bar chart.

2条回答
何必那么认真
2楼-- · 2019-03-25 05:33

I found 2 solutions that involve computing the base position of labels beforehand, one using position_jitter and one using ggrepel (suggested by user @gfgm in deleted answer)

create positions:

Note that I need to put NAs first here so I used: How to have NA's displayed first using arrange()

library(dplyr)
df <- df %>%
  group_by(L) %>%
  arrange(!is.na(l), desc(l)) %>% 
  mutate(pos = cumsum(val) - val/2)) # the -val/2 is to center the text

position_jitter solution

set.seed(2)
ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(y=pos,label=percent(val/100)),position = position_jitter(width = 0,height=4))

plot1 ggrepel solution

library(ggrepel)
ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text_repel(aes(y=pos,label=percent(val/100)),direction="y",box.padding=0)

plot2 comparison of both

ggrepel solution doesn't require manual calibration, the output isn't perfect but it's consistent, it also has great flexibility though and would be the solution of choice for most variants of my issue. Note that geom_text_repel has a seed parameter, but in my case it doesn't affect the results.

position_jitter doesn't give consistent result, positions are randomized, and for most cases it's a less good solution as text overlays (I think it's jittering as if we were dealing with points). For a given chart though it can give a better solution than ggrepel using set.seed beforehand, so maybe better for some reporting, worse the rest of the time.

If geom_text_repel supported position_stack I wouldn't have to go through the pain of the first step, but it doesn't unfortunately.

Both solutions have the slightly annoying effect of jittering isolated labels that shouldn't be jittered at all (this issue is handled by @erocoar's solution).

查看更多
祖国的老花朵
3楼-- · 2019-03-25 05:39

We can create a new Position, position_jitter_stack().

 position_jitter_stack <- function(vjust = 1, reverse = FALSE, 
                                  jitter.width = 1, jitter.height = 1,
                                  jitter.seed = NULL, offset = NULL) {
  ggproto(NULL, PositionJitterStack, vjust = vjust, reverse = reverse, 
          jitter.width = jitter.width, jitter.height = jitter.height,
          jitter.seed = jitter.seed, offset = offset)
}

PositionJitterStack <- ggproto("PositionJitterStack", PositionStack,
  type = NULL,
  vjust = 1,
  fill = FALSE,
  reverse = FALSE,
  jitter.height = 1,
  jitter.width = 1,
  jitter.seed = NULL,
  offset = 1,

  setup_params = function(self, data) {
    list(
      var = self$var %||% ggplot2:::stack_var(data),
      fill = self$fill,
      vjust = self$vjust,
      reverse = self$reverse,
      jitter.height = self$jitter.height,
      jitter.width = self$jitter.width,
      jitter.seed = self$jitter.seed,
      offset = self$offset
    )
  },

  setup_data = function(self, data, params) {
    data <- PositionStack$setup_data(data, params)
    if (!is.null(params$offset)) {
      data$to_jitter <- sapply(seq(nrow(data)), function(i) {
        any(abs(data$y[-i] - data$y[i]) <= params$offset)
      })
    } else {
      data$to_jitter <- TRUE
      }
    data
  },

  compute_panel = function(data, params, scales) {
    data <- PositionStack$compute_panel(data, params, scales)

    jitter_df <- data.frame(width = params$jitter.width,
                            height = params$jitter.height)

    if (!is.null(params$jitter.seed)) jitter_df$seed = params$jitter.seed
    jitter_positions <- PositionJitter$compute_layer(
      data[data$to_jitter, c("x", "y")],
      jitter_df
    )

    data$x[data$to_jitter] <- jitter_positions$x
    data$y[data$to_jitter] <- jitter_positions$y

    data
  }
)

And plot it ...

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),
            position = position_jitter_stack(vjust =0.5,
             jitter.height = 0.1,
             jitter.width =  0.3, offset = 1))

enter image description here

Alternatively, we could write a very simple repel function.

library(rlang)

position_stack_repel <- function(vjust = 1, reverse = FALSE, 
                                 offset = 1) {
  ggproto(NULL, PositionStackRepel, vjust = vjust, reverse = reverse,
          offset = offset)
}

PositionStackRepel <- ggproto("PositionStackRepel", PositionStack,
  type = NULL,
  vjust = 1,
  fill = FALSE,
  reverse = FALSE,
  offset = 1,

  setup_params = function(self, data) {
    list(
      var = self$var %||% ggplot2:::stack_var(data),
      fill = self$fill,
      vjust = self$vjust,
      reverse = self$reverse,
      offset = self$offset
    )
  },

  setup_data = function(self, data, params) {
    data <- PositionStack$setup_data(data, params)
    data <- data[order(data$x), ]
    data$to_repel <- unlist(by(data, data$x, function(x) {
      sapply(seq(nrow(x)), function(i) {
        (x$y[i]) / sum(x$y) < 0.1 & (
          (if (i != 1) (x$y[i-1] / sum(x$y)) < 0.1 else FALSE) | (
            if (i != nrow(x)) (x$y[i+1] / sum(x$y)) < 0.1 else FALSE))
      })
    }))
    data
  },

  compute_panel = function(data, params, scales) {
    data <- PositionStack$compute_panel(data, params, scales)
    data[data$to_repel, "x"] <- unlist(
      by(data[data$to_repel, ], data[data$to_repel, ]$x, 
         function(x) seq(x$x[1] - 0.3, x$x[1] + 0.3, length.out = nrow(x))))
    data
  }
)

Plot it:

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),
            position = position_stack_repel(vjust =0.5))

enter image description here

查看更多
登录 后发表回答