Is there a way to have a barplot and a stacked bar

2019-08-07 04:28发布

问题:

I have two pieces of data that I want to overlay onto the same plot. I've looked at several ggplot articles and I don't think it's possible within ggplot. So I have been using barplot. I have 5 tiers and I'm plotting total dollars by tier as a solid bar.

Then I have another piece of data that represents the number of tasks within those tiers by two different types of workers. I have this as a stacked bar plot. But I want to show them on the same graph with the total dollar amount as one bar and then the corresponding stacked bar next to it.

Here are the plots:

The data for the first graph looks like this (it's a table):

        1     2     3     4     5
  0     9   340    97   812  4271
  1     1   417   156  3163 11314

The data for the second graph looks like this (this is a dataset):

    Tier    variable    value
1   1   Opp_Amt 16200.00
2   2   Opp_Amt 116067.50
3   3   Opp_Amt 35284.12
4   4   Opp_Amt 278107.10
5   5   Opp_Amt 694820.29

I want to put the graphs on top of each other but the bars keep overlapping and I want them to appear side by side by tier.

Code for what I have so far.

par(mar=c(2.5, 4, 4, 4)+2)
## Plot first set of data and draw its axis
barplot(data1$value, axes=FALSE,ylim=c(0,700000), xlab="", ylab="", 
        col="black",space=-10,main="Work Score")
axis(2, ylim=c(0,700000),col="black",las=1)  ## las=1 makes horizontal labels
mtext("Total Opportunity Amount",side=2,line=3.5)
box()

## Allow a second plot on the same graph
par(new=TRUE)

## Plot the second plot and put axis scale on right


m <- barplot(counts,  xlab="", ylab="", ylim=c(0,16000),axes=FALSE, col=c("red","darkblue"),space=3,width=0.5,density=20)
## a little farther out (line=4) to make room for labels
mtext("Task Ratio: Outbound to AE",side=4,col="red",line=3.5) 
axis(4, ylim=c(0,16000), col="red",col.axis="black",las=1)

And it gives me this

回答1:

Using ggplot, I would do something like one of these. They plot the two sets of data separately. The first arranges the data into one dataframe, then uses facet_wrap() to position the plots side-by-side. The second generates the two plot objects separately, then combines the two plots and the legend into a combined plot.

But if you really need the "dual y-axis" approach, then with some fiddling, and using the plots' layouts and gtable functions, it can be done (using code borrowed from here).

Like this:

library(ggplot2)
library(gtable)
library(plyr)

df1 <- data.frame(Tier = rep(1:5, each = 2),
       y = c(9, 1, 340, 417, 97, 156, 812, 3063, 4271, 11314),
       gp = rep(0:1, 5))

df2 <- read.table(text = "
    Tier    variable    value
   1   Opp_Amt 16200.00
   2   Opp_Amt 116067.50
   3   Opp_Amt 35284.12
   4   Opp_Amt 278107.10
   5   Opp_Amt 694820.29", header = TRUE)


dfA = df1
dfB = df2
names(dfA) = c("Tier", "Value", "gp")
dfA$var = "Task Ratio"
dfB = dfB[,c(1,3)]
dfB$gp = 3
dfB$var = "Total Opportunity Amount"
names(dfB) = names(dfA)
df = rbind(dfA, dfB)
df$var = factor(df$var)
df$var = factor(df$var, levels = rev(levels(df$var)))

 ggplot(df, aes(Tier, Value, fill = factor(gp))) +
    geom_bar(position = "stack", stat = "identity") +
    facet_wrap( ~ var, scale = "free_y") +
    scale_fill_manual("Group", breaks = c("0","1"), values = c("#F8766D", "#00BFC4", "black")) +
    theme_bw() +
    theme(panel.spacing = unit(2, "lines"),
          panel.grid = element_blank()) 



Or this:

p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
   geom_bar(position = "stack", stat = "identity") +
   #guides(fill = FALSE) +
   scale_y_continuous("Task Ratio",
     limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))), 
     expand = c(0,0)) +
   scale_x_discrete("Tier") +
   theme_bw() +
   theme(panel.grid = element_blank())

p2 <- ggplot(df2, aes(factor(Tier), value)) +
   geom_bar(stat = "identity") +
   scale_y_continuous("Total Opportunity Amount", limit = c(0, 1.1*max(df2$value)),  expand = c(0,0)) +
   scale_x_discrete("Tier") +
   theme_bw() +
   theme(panel.grid = element_blank())    

# Get the ggplot grobs,
# And get the legend from p1
g1 <- ggplotGrob(p1)
leg = gtable_filter(g1, "guide-box")
legColumn = g1$layout[which(g1$layout$name == "guide-box"), "l"]
g1 = g1[,-legColumn]
g2 <- ggplotGrob(p2)

# Make sure the width are the same in g1 and g2
library(grid)
maxWidth = unit.pmax(g1$widths, g2$widths)

g1$widths = as.list(maxWidth)
g2$widths = as.list(maxWidth)

# Combine g1, g2 and the legend
library(gridExtra)
grid.arrange(arrangeGrob(g2, g1, nrow = 1), leg,
   widths = unit.c(unit(1, "npc") - leg$width, leg$width), nrow=1)



Or the dual y-axis approach (But not recommended for reasons given in @Phil's post):

width1 = 0.6       # width of bars in p1
width2 = 0.2       # width of bars in p2
pos = .5*width1 + .5*width2    # positioning bars in p2

p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
   geom_bar(position = "stack", stat = "identity", width = width1) +
   guides(fill = FALSE) + 
   scale_y_continuous("", 
     limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))), 
     expand = c(0,0)) +
   scale_x_discrete("Tier") +
   theme_bw() +
   theme(panel.grid = element_blank(),
         axis.text.y = element_text(colour = "red", hjust = 0, margin = margin(l = 2, unit = "pt")),
         axis.ticks.y = element_line(colour = "red"))

p2 <- ggplot(df2, aes(factor(Tier), value)) +
   geom_blank() +
   geom_bar(aes(x = Tier - pos), stat = "identity", width = width2) +
   scale_y_continuous("", limit = c(0, 1.1*max(df2$value)),  expand = c(0,0)) +
   theme_bw() +
   theme(panel.grid = element_blank())

# Get ggplot grobs
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)

# Get locations of the panels in g1
pp1 <- c(subset(g1$layout, name == "panel", se = t:r))

## Get bars from g2 and insert them into the panel in g1
g <- gtable_add_grob(g1, g2$grobs[[which(g2$layout$name == "panel")]][[4]][[4]], pp1$t, pp1$l)

# Grab axis from g1, reverse elements, and put it on the right
index <- which(g1$layout$name == "axis-l")
grob <- g1$grobs[[index]]
axis <- grob$children[[2]]
axis$widths <- rev(axis$widths)
axis$grobs <- rev(axis$grobs)
axis$grobs[[1]]$x <- axis$grobs[[1]]$x - unit(1, "npc") + unit(3, "pt")

g <- gtable_add_cols(g, g1$widths[g1$layout[index, ]$l], pp1$r)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l+1)

# Grab axis from g2, and put it on the left
index <- which(g2$layout$name == "axis-l")
grob <- g2$grobs[[index]]
axis <- grob$children[[2]]
g <- gtable_add_grob(g, rectGrob(gp = gpar(col = NA, fill = "white")), pp1$t-1, pp1$l-1, pp1$b+1)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l-1)

# Add axis titles
# right axis title
RightAxisText = textGrob("Task Ratio", rot = 90, gp = gpar(col = "red"))
g <- gtable_add_cols(g, unit.c(unit(1, "grobwidth", RightAxisText) + unit(1, "line")), 5)
g <- gtable_add_grob(g, RightAxisText, pp1$t, pp1$r+2)

# left axis title
LeftAxisText = textGrob("Total Opportunity Amount", rot = 90)
g <- gtable_add_grob(g, LeftAxisText, pp1$t, pp1$l-2)
g$widths[2] <- unit.c(unit(1, "grobwidth", LeftAxisText) + unit(1, "line"))

# Draw it
grid.newpage()
grid.draw(g)



回答2:

It appears you are trying to plot two variables on two different y scales on to one chart. I recommend against this, and this is considered bad practice. See, for example, @hadley 's (the author of ggplot2) answer here about a similar issue: https://stackoverflow.com/a/3101876/3022126

It is possible to plot two variables on one y axis if they have comparable scales, but the range of your two datasets do not greatly overlap.

Consider other visualisations, perhaps using two separate charts.



回答3:

Try looking at the add parameter for barplot.

## Function to create alpha colors for illustration.
col2alpha <- function(col, alpha = 0.5) {
  tmp <- col2rgb(col)
  rgb(tmp[1]/255, tmp[2]/255, tmp[3]/255, alpha)  
}

## Some fake data
dat1 <- data.frame(id = 1:4, val = c(10, 8, 6, 4))
dat2 <- data.frame(id = 1:4, val = c(4, 6, 8, 10))

barplot(dat1$val, col = col2alpha("blue"))
barplot(dat2$val, col = col2alpha("red"), add = TRUE)