Error bars on stacked bar ggplot2

2019-06-15 01:41发布

问题:

I'm struggling to put error bars into the correct place on a stacked bar. As I read on an earlier post I used ddply in order to stack the error bars. Then that changed the order of the stacking so I ordered the factor. Now it appears the error bars are correct on one set of bars but not the other. What I want is a graph that looks like that below, just with the standard error shown with error bars. I'm listing the dput of the original data and the ddply data as well as the data set.

Suz2$org <- factor(Suz2$org, levels = c('fungi','bacteria'),ordered = TRUE)

library(plyr) 
plydat <- ddply(Suz2,.(org, group, time),transform,ybegin = copy - se,yend = copy + se) 

colvec <-c("blue", "orange")

ggplot(plydat, aes(time, copy)) + 
  geom_bar(aes(fill = factor(org)), stat="identity", width = 0.7) +
  scale_fill_manual(values = colvec) +
  facet_wrap(~group,nrow = 1)+
  geom_errorbar(aes(ymax=ybegin , ymin= yend ),width=.5) +
  theme(panel.background = element_rect(fill='white', colour='white'), 
        panel.grid = element_line(color = NA),
        panel.grid.minor = element_line(color = NA),
        panel.border = element_rect(fill = NA, color = "black"),
        axis.text.x  = element_text(size=10, colour="black", face = "bold"),  
        axis.title.x = element_text(vjust=0.1, face = "bold"),
        axis.text.y = element_text(size=12, colour="black"),
        axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))

dput(plydat)

structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W", 
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000, 
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000, 
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0", 
"Native D707", "Notill D0", "Notill D707"), class = "factor"), 
    se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000, 
    1930000, 2980000, 43900000, 20100000, 56400000, 41200000, 
    75700000, 22500000, 57500000, 28100000), ybegin = c(86700000, 
    12690000, 33090000, 7490000, 38100000, 12800000, 14070000, 
    5660000, 254100000, 57800000, 176600000, 178800000, 261300000, 
    65900000, 266500000, 160900000), yend = c(108900000, 18310000, 
    47310000, 13310000, 72100000, 15800000, 17930000, 11620000, 
    341900000, 9.8e+07, 289400000, 261200000, 412700000, 110900000, 
    381500000, 217100000)), .Names = c("org", "time", "copy", 
"group", "se", "ybegin", "yend"), row.names = c(NA, -16L), class = "data.frame")

dput(Suz2)

structure(list(org = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("fungi", "bacteria"
), class = c("ordered", "factor")), time = structure(c(1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("0W", 
"6W"), class = "factor"), copy = c(97800000, 15500000, 40200000, 
10400000, 55100000, 14300000, 1.6e+07, 8640000, 2.98e+08, 77900000, 
2.33e+08, 2.2e+08, 3.37e+08, 88400000, 3.24e+08, 1.89e+08), group = structure(c(3L, 
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L), .Label = c("Native D0", 
"Native D707", "Notill D0", "Notill D707"), class = "factor"), 
    se = c(11100000, 2810000, 7110000, 2910000, 1.7e+07, 1500000, 
    1930000, 2980000, 43900000, 20100000, 56400000, 41200000, 
    75700000, 22500000, 57500000, 28100000)), .Names = c("org", 
"time", "copy", "group", "se"), row.names = c(NA, -16L), class = "data.frame")

Suz2

  org time     copy       group       se
1     fungi   0W 9.78e+07   Notill D0 11100000
2     fungi   0W 1.55e+07 Notill D707  2810000
3     fungi   0W 4.02e+07   Native D0  7110000
4     fungi   0W 1.04e+07 Native D707  2910000
5     fungi   6W 5.51e+07   Notill D0 17000000
6     fungi   6W 1.43e+07 Notill D707  1500000
7     fungi   6W 1.60e+07   Native D0  1930000
8     fungi   6W 8.64e+06 Native D707  2980000
9  bacteria   0W 2.98e+08   Notill D0 43900000
10 bacteria   0W 7.79e+07 Notill D707 20100000
11 bacteria   0W 2.33e+08   Native D0 56400000
12 bacteria   0W 2.20e+08 Native D707 41200000
13 bacteria   6W 3.37e+08   Notill D0 75700000
14 bacteria   6W 8.84e+07 Notill D707 22500000
15 bacteria   6W 3.24e+08   Native D0 57500000
16 bacteria   6W 1.89e+08 Native D707 28100000

回答1:

The values for both ybegin and yend, the range of the errorbar, are too low for the bacteria data. Since the bars for bacteria are on top of the fungi bars, the height of the fungi bars (plydat$copy[plydat$org == "fungi"]) has to be added to the errorbar values of the bacteria data.

plydat[plydat$org == "bacteria", ] 
   <- transform(plydat[plydat$org == "bacteria", ],
                ybegin = ybegin + plydat[plydat$org == "fungi", "copy"], 
                yend = yend + plydat[plydat$org == "fungi", "copy"])



回答2:

Personally, I'm not really fond of a stacked bar chart, especially when the number of stacked bars is large (which is not the case for you). The main problem is that fact that all but the lowest stack do not share the same baseline. In your case, it is hard to compare the orange bacteria class as they do not share the same base (y value, copy).

I propose to use a plot called a dotplot:

library(ggplot2)
theme_set(theme_bw())
ggplot(plydat, aes(time, copy, color = org)) + 
   geom_point() + facet_wrap(~group, ncol = 1) + 
   geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0) + coord_flip()

Note that the copy value is not additive here as it was in the stacked barchart. Because they share the same base copy value (0), you can easily compare between different values of bacteria. In addition, I swap the x and y axis to make it easy to compare the value of copy (just remove the coord_flip to see how bad that works in comparing copy).

The only real downside is that there is no easy way of judging the sum of fungi and bacteria. Depending on what the chart is meant to show (the story of the chart) this may or may not be a problem. You could add a separate additional category to org, i.e. both which is the sum of both categories, to remedy this. Of course, interpreting the error in this summed category is non-trivial.



回答3:

From a combination of the above answers I think I'm going to go with something like this.

plydat <- ddply(Suz2,.(org),transform,ybegin = copy - se,yend = copy + se)   

colvec <-c("blue", "orange")

ggplot(plydat, aes(time, copy, color = factor(org))) + 
   geom_point(size = 3.5) + facet_wrap(~group, ncol = 4) + 
   scale_color_manual(values = colvec) +
   geom_errorbar(aes(ymax=ybegin , ymin= yend), width = 0.08, 
        color = "black", size = 0.1) +
   theme(panel.background = element_rect(fill='white', colour='white'), 
        panel.grid = element_line(color = NA),
        panel.grid.minor = element_line(color = NA),
        panel.border = element_rect(fill = NA, color = "black"),
        strip.background = element_blank(),
        axis.text.x  = element_text(size=10, colour="black", face = "bold"),  
        axis.title.x = element_text(vjust=0.1, face = "bold"),
        axis.text.y = element_text(size=12, colour="black"),
        axis.title.y = element_text(vjust=0.2, size = 12, face = "bold"))



标签: r ggplot2 plyr