I have two pieces of data that I want to overlay onto the same plot. I've looked at several ggplot articles and I don't think it's possible within ggplot. So I have been using barplot. I have 5 tiers and I'm plotting total dollars by tier as a solid bar.
Then I have another piece of data that represents the number of tasks within those tiers by two different types of workers. I have this as a stacked bar plot. But I want to show them on the same graph with the total dollar amount as one bar and then the corresponding stacked bar next to it.
Here are the plots:
The data for the first graph looks like this (it's a table):
1 2 3 4 5
0 9 340 97 812 4271
1 1 417 156 3163 11314
The data for the second graph looks like this (this is a dataset):
Tier variable value
1 1 Opp_Amt 16200.00
2 2 Opp_Amt 116067.50
3 3 Opp_Amt 35284.12
4 4 Opp_Amt 278107.10
5 5 Opp_Amt 694820.29
I want to put the graphs on top of each other but the bars keep overlapping and I want them to appear side by side by tier.
Code for what I have so far.
par(mar=c(2.5, 4, 4, 4)+2)
## Plot first set of data and draw its axis
barplot(data1$value, axes=FALSE,ylim=c(0,700000), xlab="", ylab="",
col="black",space=-10,main="Work Score")
axis(2, ylim=c(0,700000),col="black",las=1) ## las=1 makes horizontal labels
mtext("Total Opportunity Amount",side=2,line=3.5)
box()
## Allow a second plot on the same graph
par(new=TRUE)
## Plot the second plot and put axis scale on right
m <- barplot(counts, xlab="", ylab="", ylim=c(0,16000),axes=FALSE, col=c("red","darkblue"),space=3,width=0.5,density=20)
## a little farther out (line=4) to make room for labels
mtext("Task Ratio: Outbound to AE",side=4,col="red",line=3.5)
axis(4, ylim=c(0,16000), col="red",col.axis="black",las=1)
And it gives me this
Using ggplot
, I would do something like one of these. They plot the two sets of data separately. The first arranges the data into one dataframe, then uses facet_wrap()
to position the plots side-by-side. The second generates the two plot objects separately, then combines the two plots and the legend into a combined plot.
But if you really need the "dual y-axis" approach, then with some fiddling, and using the plots' layouts and gtable
functions, it can be done (using code borrowed from here).
Like this:
library(ggplot2)
library(gtable)
library(plyr)
df1 <- data.frame(Tier = rep(1:5, each = 2),
y = c(9, 1, 340, 417, 97, 156, 812, 3063, 4271, 11314),
gp = rep(0:1, 5))
df2 <- read.table(text = "
Tier variable value
1 Opp_Amt 16200.00
2 Opp_Amt 116067.50
3 Opp_Amt 35284.12
4 Opp_Amt 278107.10
5 Opp_Amt 694820.29", header = TRUE)
dfA = df1
dfB = df2
names(dfA) = c("Tier", "Value", "gp")
dfA$var = "Task Ratio"
dfB = dfB[,c(1,3)]
dfB$gp = 3
dfB$var = "Total Opportunity Amount"
names(dfB) = names(dfA)
df = rbind(dfA, dfB)
df$var = factor(df$var)
df$var = factor(df$var, levels = rev(levels(df$var)))
ggplot(df, aes(Tier, Value, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity") +
facet_wrap( ~ var, scale = "free_y") +
scale_fill_manual("Group", breaks = c("0","1"), values = c("#F8766D", "#00BFC4", "black")) +
theme_bw() +
theme(panel.spacing = unit(2, "lines"),
panel.grid = element_blank())
Or this:
p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity") +
#guides(fill = FALSE) +
scale_y_continuous("Task Ratio",
limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))),
expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank())
p2 <- ggplot(df2, aes(factor(Tier), value)) +
geom_bar(stat = "identity") +
scale_y_continuous("Total Opportunity Amount", limit = c(0, 1.1*max(df2$value)), expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank())
# Get the ggplot grobs,
# And get the legend from p1
g1 <- ggplotGrob(p1)
leg = gtable_filter(g1, "guide-box")
legColumn = g1$layout[which(g1$layout$name == "guide-box"), "l"]
g1 = g1[,-legColumn]
g2 <- ggplotGrob(p2)
# Make sure the width are the same in g1 and g2
library(grid)
maxWidth = unit.pmax(g1$widths, g2$widths)
g1$widths = as.list(maxWidth)
g2$widths = as.list(maxWidth)
# Combine g1, g2 and the legend
library(gridExtra)
grid.arrange(arrangeGrob(g2, g1, nrow = 1), leg,
widths = unit.c(unit(1, "npc") - leg$width, leg$width), nrow=1)
Or the dual y-axis approach (But not recommended for reasons given in @Phil's post):
width1 = 0.6 # width of bars in p1
width2 = 0.2 # width of bars in p2
pos = .5*width1 + .5*width2 # positioning bars in p2
p1 <- ggplot(df1, aes(factor(Tier), y, fill = factor(gp))) +
geom_bar(position = "stack", stat = "identity", width = width1) +
guides(fill = FALSE) +
scale_y_continuous("",
limit = c(0, 1.1*max(ddply(df1, .(Tier), summarise, sum = sum(y)))),
expand = c(0,0)) +
scale_x_discrete("Tier") +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(colour = "red", hjust = 0, margin = margin(l = 2, unit = "pt")),
axis.ticks.y = element_line(colour = "red"))
p2 <- ggplot(df2, aes(factor(Tier), value)) +
geom_blank() +
geom_bar(aes(x = Tier - pos), stat = "identity", width = width2) +
scale_y_continuous("", limit = c(0, 1.1*max(df2$value)), expand = c(0,0)) +
theme_bw() +
theme(panel.grid = element_blank())
# Get ggplot grobs
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
# Get locations of the panels in g1
pp1 <- c(subset(g1$layout, name == "panel", se = t:r))
## Get bars from g2 and insert them into the panel in g1
g <- gtable_add_grob(g1, g2$grobs[[which(g2$layout$name == "panel")]][[4]][[4]], pp1$t, pp1$l)
# Grab axis from g1, reverse elements, and put it on the right
index <- which(g1$layout$name == "axis-l")
grob <- g1$grobs[[index]]
axis <- grob$children[[2]]
axis$widths <- rev(axis$widths)
axis$grobs <- rev(axis$grobs)
axis$grobs[[1]]$x <- axis$grobs[[1]]$x - unit(1, "npc") + unit(3, "pt")
g <- gtable_add_cols(g, g1$widths[g1$layout[index, ]$l], pp1$r)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l+1)
# Grab axis from g2, and put it on the left
index <- which(g2$layout$name == "axis-l")
grob <- g2$grobs[[index]]
axis <- grob$children[[2]]
g <- gtable_add_grob(g, rectGrob(gp = gpar(col = NA, fill = "white")), pp1$t-1, pp1$l-1, pp1$b+1)
g <- gtable_add_grob(g, axis, pp1$t, pp1$l-1)
# Add axis titles
# right axis title
RightAxisText = textGrob("Task Ratio", rot = 90, gp = gpar(col = "red"))
g <- gtable_add_cols(g, unit.c(unit(1, "grobwidth", RightAxisText) + unit(1, "line")), 5)
g <- gtable_add_grob(g, RightAxisText, pp1$t, pp1$r+2)
# left axis title
LeftAxisText = textGrob("Total Opportunity Amount", rot = 90)
g <- gtable_add_grob(g, LeftAxisText, pp1$t, pp1$l-2)
g$widths[2] <- unit.c(unit(1, "grobwidth", LeftAxisText) + unit(1, "line"))
# Draw it
grid.newpage()
grid.draw(g)
It appears you are trying to plot two variables on two different y scales on to one chart. I recommend against this, and this is considered bad practice. See, for example, @hadley 's (the author of ggplot2) answer here about a similar issue: https://stackoverflow.com/a/3101876/3022126
It is possible to plot two variables on one y axis if they have comparable scales, but the range of your two datasets do not greatly overlap.
Consider other visualisations, perhaps using two separate charts.
Try looking at the add
parameter for barplot
.
## Function to create alpha colors for illustration.
col2alpha <- function(col, alpha = 0.5) {
tmp <- col2rgb(col)
rgb(tmp[1]/255, tmp[2]/255, tmp[3]/255, alpha)
}
## Some fake data
dat1 <- data.frame(id = 1:4, val = c(10, 8, 6, 4))
dat2 <- data.frame(id = 1:4, val = c(4, 6, 8, 10))
barplot(dat1$val, col = col2alpha("blue"))
barplot(dat2$val, col = col2alpha("red"), add = TRUE)