I have five line plots from which I'd like to output a shaded area that represents the region between their plotted upper and lower regions. I'm creating an R script (see below) as I have multiple datasets for which I need to repeat this exercise.
However, I'm only able to print the geom_ribbon from the last i and j pair - I can't seem to output every geom_ribbon into the created list.
I'd grateful for any ideas on how to import all of the geom_ribbon objects into the list. Only one plot is printed with print(Z)
(example below). I'd like, if possible, all geom_ribbon objects to be overlain and printed as a single ggplot?
Z <- list()
allmaxi <- list(cahp_max_plot15cb$decade_maxa, cahp_max_plot15cb$decade_maxc,cahp_max_plot15cb$decade_maxd, cahp_max_plot15cb$decade_maxe, cahp_max_plot15cb$decade_maxf)
allmaxj <- list(cahp_max_plot15cb$decade_maxa, cahp_max_plot15cb$decade_maxc,cahp_max_plot15cb$decade_maxd, cahp_max_plot15cb$decade_maxe, cahp_max_plot15cb$decade_maxf)
for (i in allmaxi) {
for (j in allmaxj) {
l <- geom_ribbon(data=cahp_max_plot15cb,aes(x=decade,ymin=i, ymax=j))
Z[[length(Z) + 1]] <- l
print(i)
print(j)
}
}
print(ggplot() + Z)
Sample output (from print(i) and print(j) in script) from inputting one dataset (decade_maxa) to i list, and four other data sets to j list:
[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
[1] 1390.260 1247.700 1263.578 1711.638 1228.326 1762.045 1260.147 1171.914 1697.987 1350.867
[11] 1434.525 1488.818 1610.513 1536.895
`
`[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
`[1] 1120.2700 1094.3047 1196.8792 1227.9660 1236.9170 1266.0935 1127.1480 974.6948 947.3365
[10] 1244.3242 1254.2704 1082.3667 1286.9080 1126.1943
`
`[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
`[1] 1396.695 1425.073 1382.941 1913.495 1401.754 1499.763 1600.656 1367.043 1413.390 1343.804
[11] 1431.790 1402.292 1329.192 1696.729
`
`[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
`[1] 1718.874 1389.134 1501.574 1233.189 1262.480 1508.919 1291.467 1431.869 1505.102 1376.519
[11] 1441.181 1421.552 1326.547 1635.599
`
> print(ggplot() + Z)
`
This is my aim. Maybe there is a better way with lapply?
This is the image output by integrating median values, as proposed below:
median_g <- group_by(cahp_max_plot15cbm,decade)
median_gm <- mutate(median_g, median=median(value))
p2 <- ggplot(median_gm) + geom_ribbon(aes(x=decade, ymin=median,ymax=value,group=variable),alpha=0.40,fill="#3985ff") +
geom_line(aes(x=decade,y=value,group=variable,color=variable),lwd=1) +
geom_point(aes(x=decade,y=median))
p2
Here's a slightly over-engineered solution: find all segment-segment intersections, add those abscissae to the mix, and for each x locate the min and max values.
# some segment-segment intersection code
# http://paulbourke.net/geometry/pointlineplane/
ssi <- function(x1, x2, x3, x4, y1, y2, y3, y4){
denom <- ((y4 - y3)*(x2 - x1) - (x4 - x3)*(y2 - y1))
denom[abs(denom) < 1e-10] <- NA # parallel lines
ua <- ((x4 - x3)*(y1 - y3) - (y4 - y3)*(x1 - x3)) / denom
ub <- ((x2 - x1)*(y1 - y3) - (y2 - y1)*(x1 - x3)) / denom
x <- x1 + ua * (x2 - x1)
y <- y1 + ua * (y2 - y1)
inside <- (ua >= 0) & (ua <= 1) & (ub >= 0) & (ub <= 1)
data.frame(x = ifelse(inside, x, NA),
y = ifelse(inside, y, NA))
}
# do it with two polylines (xy dataframes)
ssi_polyline <- function(l1, l2){
n1 <- nrow(l1)
n2 <- nrow(l2)
stopifnot(n1==n2)
x1 <- l1[-n1,1] ; y1 <- l1[-n1,2]
x2 <- l1[-1L,1] ; y2 <- l1[-1L,2]
x3 <- l2[-n2,1] ; y3 <- l2[-n2,2]
x4 <- l2[-1L,1] ; y4 <- l2[-1L,2]
ssi(x1, x2, x3, x4, y1, y2, y3, y4)
}
# testing the above
d1 <- cbind(seq(1, 10), rnorm(10))
d2 <- cbind(seq(1, 10), rnorm(10))
plot(rbind(d1, d2), t="n")
lines(d1)
lines(d2, col=2)
points(ssi_polyline(d1, d2))
# do it with all columns of a matrix (common xs assumed)
# the general case (different xs) could be treated similarly
# e.g by doing first a linear interpolation at all unique xs
ssi_matrix <- function(x, m){
# pairwise combinations
cn <- combn(ncol(m), 2)
test_pair <- function(i){
l1 <- cbind(x, m[,cn[1,i]])
l2 <- cbind(x, m[,cn[2,i]])
pts <- ssi_polyline(l1, l2)
pts[complete.cases(pts),]
}
ints <- lapply(seq_len(ncol(cn)), test_pair)
do.call(rbind, ints)
}
# testing this on a matrix
m <- replicate(5, rnorm(10))
x <- seq_len(nrow(m))
matplot(x, m, t="l", lty=1)
test <- ssi_matrix(x, m)
points(test)
# now, apply this to the dataset at hand
library(ggplot2)
library(reshape2)
library(plyr)
set.seed(123)
data <- data.frame(decade=1:10)
n=nrow(data)
data$maxa <- runif(n,1000,2000)
data$maxb <- runif(n,1000,2000)
data$maxc <- runif(n,1000,2000)
data$maxd <- runif(n,1000,2000)
data$maxe <- runif(n,1000,2000)
newpoints <- setNames(data.frame(ssi_matrix(data$decade, data[,-1L]),
"added"), c("decade", "value", "variable"))
mdata <- melt(data, id=1L)
interpolated <- ddply(mdata, "variable", function(d){
xy <- approx(d$decade, d$value, xout=newpoints[,1])
data.frame(decade = xy$x, value=xy$y, variable = "interpolated")
})
all <- rbind(mdata, interpolated, newpoints)
rib <- ddply(all, "decade", summarise,
ymin=min(value), ymax=max(value))
ggplot(mdata, aes(decade)) +
geom_ribbon(data = rib, aes(x=decade, ymin=ymin, ymax=ymax),
alpha=0.40,fill="#3985ff")+
geom_line(aes(y=value, colour=variable))
I was intrigued by the question, and wanted to see if I could get to an answer with simulated (but similar) data as I like making plots. I included my first approach which didn't work completely as intended, for illustrative purposes.
library(ggplot2)
library(reshape2)
library(plyr)
set.seed(123)
data <- data.frame(decade=1:10)
n=nrow(data)
data$maxa <- runif(n,1000,2000)
data$maxb <- runif(n,1000,2000)
data$maxc <- runif(n,1000,2000)
data$maxd <- runif(n,1000,2000)
data$maxe <- runif(n,1000,2000)
First approach: calculate min and max, and use those to calculate a ribbon
data$min <- apply(data[,-1],MARGIN=1,FUN=min)
data$max <- apply(data[,-1],MARGIN=1,FUN=max)
#reshape
data_long <- melt(data, id.vars=c("decade","min","max"))
#plot
p1 <- ggplot(data_long) +
geom_ribbon(aes(x=decade,ymin=min,ymax=max),fill="#FFCCCC", alpha=0.3) +
geom_line(aes(x=decade,y=value,group=variable,col=variable),size=1)
p1
does not get intended result; plots ribbons between peaks.
Second approach; should work for data dat are not too extreme: find median value for each decade, and use that as ymin for the ribbon. ymax is the value in the melted dataset.
#find median
data_long <- ddply(data_long,.(decade),transform, median=median(value))
#plot. Quick hex-color and no alpha because the ribbons overlap, and that becomes visible with alpha.
p2 <- ggplot(data_long) + geom_ribbon(aes(x=decade, ymin=median,ymax=value,group=variable),fill="#FFCCCC")+
geom_line(aes(x=decade,y=value,group=variable,col=variable),size=1)
p2
Works!