R ggplot2: overlaying multiple geom_ribbon objects

2020-02-14 09:26发布

问题:

I have five line plots from which I'd like to output a shaded area that represents the region between their plotted upper and lower regions. I'm creating an R script (see below) as I have multiple datasets for which I need to repeat this exercise.

However, I'm only able to print the geom_ribbon from the last i and j pair - I can't seem to output every geom_ribbon into the created list.

I'd grateful for any ideas on how to import all of the geom_ribbon objects into the list. Only one plot is printed with print(Z)(example below). I'd like, if possible, all geom_ribbon objects to be overlain and printed as a single ggplot?

Z <- list()
allmaxi <- list(cahp_max_plot15cb$decade_maxa, cahp_max_plot15cb$decade_maxc,cahp_max_plot15cb$decade_maxd, cahp_max_plot15cb$decade_maxe, cahp_max_plot15cb$decade_maxf)
allmaxj <- list(cahp_max_plot15cb$decade_maxa, cahp_max_plot15cb$decade_maxc,cahp_max_plot15cb$decade_maxd, cahp_max_plot15cb$decade_maxe, cahp_max_plot15cb$decade_maxf)
for (i in allmaxi) {
  for (j in allmaxj) {
    l <-  geom_ribbon(data=cahp_max_plot15cb,aes(x=decade,ymin=i, ymax=j))
    Z[[length(Z) + 1]] <- l
    print(i)
    print(j)
  }     
}
print(ggplot() + Z)

Sample output (from print(i) and print(j) in script) from inputting one dataset (decade_maxa) to i list, and four other data sets to j list:

[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106 
[11] 1580.880 1685.253 1653.178 1824.842

[1] 1390.260 1247.700 1263.578 1711.638 1228.326 1762.045 1260.147 1171.914 1697.987 1350.867
[11] 1434.525 1488.818 1610.513 1536.895
`
 `[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
 `[1] 1120.2700 1094.3047 1196.8792 1227.9660 1236.9170 1266.0935 1127.1480  974.6948  947.3365
[10] 1244.3242 1254.2704 1082.3667 1286.9080 1126.1943
`
 `[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
 `[1] 1396.695 1425.073 1382.941 1913.495 1401.754 1499.763 1600.656 1367.043 1413.390 1343.804
[11] 1431.790 1402.292 1329.192 1696.729
`
 `[1] 2010.811 1723.783 1961.088 1662.909 1587.191 1662.140 1665.415 1602.974 1807.453 1586.106
[11] 1580.880 1685.253 1653.178 1824.842
`
 `[1] 1718.874 1389.134 1501.574 1233.189 1262.480 1508.919 1291.467 1431.869 1505.102 1376.519
[11] 1441.181 1421.552 1326.547 1635.599
`
> print(ggplot() + Z)

`

This is my aim. Maybe there is a better way with lapply?

This is the image output by integrating median values, as proposed below:

median_g <- group_by(cahp_max_plot15cbm,decade) median_gm <- mutate(median_g, median=median(value)) p2 <- ggplot(median_gm) + geom_ribbon(aes(x=decade, ymin=median,ymax=value,group=variable),alpha=0.40,fill="#3985ff") + geom_line(aes(x=decade,y=value,group=variable,color=variable),lwd=1) + geom_point(aes(x=decade,y=median)) p2

回答1:

Here's a slightly over-engineered solution: find all segment-segment intersections, add those abscissae to the mix, and for each x locate the min and max values.

# some segment-segment intersection code
# http://paulbourke.net/geometry/pointlineplane/
ssi <- function(x1, x2, x3, x4, y1, y2, y3, y4){

  denom <- ((y4 - y3)*(x2 - x1) - (x4 - x3)*(y2 - y1))
  denom[abs(denom) < 1e-10] <- NA # parallel lines

  ua <- ((x4 - x3)*(y1 - y3) - (y4 - y3)*(x1 - x3)) / denom
  ub <- ((x2 - x1)*(y1 - y3) - (y2 - y1)*(x1 - x3)) / denom

  x <- x1 + ua * (x2 - x1)
  y <- y1 + ua * (y2 - y1)
  inside <- (ua >= 0) & (ua <= 1) & (ub >= 0) & (ub <= 1)
  data.frame(x = ifelse(inside, x, NA), 
             y = ifelse(inside, y, NA))

}
# do it with two polylines (xy dataframes)
ssi_polyline <- function(l1, l2){
  n1 <- nrow(l1)
  n2 <- nrow(l2)
  stopifnot(n1==n2)
  x1 <- l1[-n1,1] ; y1 <- l1[-n1,2] 
  x2 <- l1[-1L,1] ; y2 <- l1[-1L,2] 
  x3 <- l2[-n2,1] ; y3 <- l2[-n2,2] 
  x4 <- l2[-1L,1] ; y4 <- l2[-1L,2] 
  ssi(x1, x2, x3, x4, y1, y2, y3, y4)
}
# testing the above
d1 <- cbind(seq(1, 10), rnorm(10))
d2 <- cbind(seq(1, 10), rnorm(10))
plot(rbind(d1, d2), t="n")
lines(d1)
lines(d2, col=2)
points(ssi_polyline(d1, d2))

# do it with all columns of a matrix (common xs assumed)
# the general case (different xs) could be treated similarly
# e.g by doing first a linear interpolation at all unique xs
ssi_matrix <- function(x, m){
  # pairwise combinations
  cn <- combn(ncol(m), 2)
  test_pair <- function(i){
    l1 <- cbind(x, m[,cn[1,i]])
    l2 <- cbind(x, m[,cn[2,i]])
    pts <- ssi_polyline(l1, l2)
    pts[complete.cases(pts),]
  }
  ints <- lapply(seq_len(ncol(cn)), test_pair)
  do.call(rbind, ints)

}
# testing this on a matrix
m <- replicate(5, rnorm(10))
x <- seq_len(nrow(m))
matplot(x, m, t="l", lty=1)
test <- ssi_matrix(x, m)
points(test)

# now, apply this to the dataset at hand

library(ggplot2)
library(reshape2)
library(plyr)
set.seed(123)
data <- data.frame(decade=1:10)
n=nrow(data)
data$maxa <- runif(n,1000,2000)
data$maxb <- runif(n,1000,2000)
data$maxc <- runif(n,1000,2000)
data$maxd <- runif(n,1000,2000)
data$maxe <- runif(n,1000,2000)

newpoints <- setNames(data.frame(ssi_matrix(data$decade, data[,-1L]), 
                                 "added"), c("decade", "value", "variable"))
mdata <- melt(data, id=1L)

interpolated <- ddply(mdata, "variable", function(d){
  xy <- approx(d$decade, d$value, xout=newpoints[,1])
  data.frame(decade = xy$x, value=xy$y, variable = "interpolated")
})

all <- rbind(mdata, interpolated, newpoints)

rib <- ddply(all, "decade", summarise, 
             ymin=min(value), ymax=max(value))

ggplot(mdata, aes(decade)) +
  geom_ribbon(data = rib, aes(x=decade, ymin=ymin, ymax=ymax),
              alpha=0.40,fill="#3985ff")+
  geom_line(aes(y=value, colour=variable))



回答2:

I was intrigued by the question, and wanted to see if I could get to an answer with simulated (but similar) data as I like making plots. I included my first approach which didn't work completely as intended, for illustrative purposes.

library(ggplot2)
library(reshape2)
library(plyr)
set.seed(123)
data <- data.frame(decade=1:10)
n=nrow(data)
data$maxa <- runif(n,1000,2000)
data$maxb <- runif(n,1000,2000)
data$maxc <- runif(n,1000,2000)
data$maxd <- runif(n,1000,2000)
data$maxe <- runif(n,1000,2000)

First approach: calculate min and max, and use those to calculate a ribbon

data$min <- apply(data[,-1],MARGIN=1,FUN=min)
data$max <- apply(data[,-1],MARGIN=1,FUN=max)

#reshape
data_long <- melt(data, id.vars=c("decade","min","max"))

#plot
p1 <- ggplot(data_long) +
  geom_ribbon(aes(x=decade,ymin=min,ymax=max),fill="#FFCCCC", alpha=0.3) +
  geom_line(aes(x=decade,y=value,group=variable,col=variable),size=1) 

p1

does not get intended result; plots ribbons between peaks.

Second approach; should work for data dat are not too extreme: find median value for each decade, and use that as ymin for the ribbon. ymax is the value in the melted dataset.

#find median 
data_long <- ddply(data_long,.(decade),transform, median=median(value))

#plot. Quick hex-color and no alpha because the ribbons overlap, and that becomes visible with alpha.
p2 <- ggplot(data_long) + geom_ribbon(aes(x=decade, ymin=median,ymax=value,group=variable),fill="#FFCCCC")+
  geom_line(aes(x=decade,y=value,group=variable,col=variable),size=1) 

p2 

Works!



标签: r nested ggplot2