Plot shaded time period (geom_rect) inside dplyr d

2019-08-27 08:24发布

问题:

I'm having trouble getting a geom_rect to display a shaded area when using facet_wrap and the dplyr do(...) to generate the plots.

NOTE: The issue here may be related to a data structure issue. See this SO question for the current state of play.

The following minimal example uses the ggplot2 packages economics data and the NBER recession dates from the tis package.

Appreciate hints tips and incantations.

library(tis)
library(ggplot2)
# Prepare NBER recession start end dates.
start <- data.frame(date = as.Date(as.character(nberDates()[,"Start"]),"%Y%m%d"),
                    start= as.Date(as.character(nberDates()[,"Start"]),"%Y%m%d"))
end <- data.frame(date = as.Date(as.character(nberDates()[,"End"]),"%Y%m%d"),
                  end= as.Date(as.character(nberDates()[,"End"]),"%Y%m%d"))
dl <- economics %>% 
        gather(metric, value, pce:unemploy ) %>%
        group_by(metric) %>%
        mutate(diff = value - lag(value, default=first(value))) %>%
        mutate(pct = diff/value) %>%
        gather(transform, value, value:pct ) %>%
        full_join(x=., y=start, by=c('date' = 'date')) %>%
        full_join(x=., y=end, by=c('date' = 'date')) %>%
        mutate(ymin = 0) %>%
        mutate(ymax = Inf)
# Check the start end dates are present
dl %>% group_by(metric,transform, start) %>% summarise( count=n())

pl <- dl %>%
        do(
          plots = ggplot(data=., aes(x = date, y = value)) +
                      geom_point() +
                      geom_rect(aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax)) +
                      stat_smooth(method="auto",size=1.5) +
                      facet_wrap(~transform, scales="free_y") 
          )  

pl[[1,2]]

回答1:

I have checked that the minimum and maximum dates for each group are the same (NA group is not plotted):

dl %>% 
  group_by(transform) %>% 
  summarise(min= min(start, na.rm =TRUE), max = max(start, na.rm =TRUE))# 

A tibble: 4 x 3
  transform min        max       
  <chr>     <date>     <date>    
1 diff      1970-01-01 2008-01-01
2 pct       1970-01-01 2008-01-01
3 value     1970-01-01 2008-01-01
4 NA        1857-07-01 1960-05-01

Even if it is not the optimal solution, you can hard code both dates and use annotate to avoid opacity as geom_rect will draw multiple rectangles. I added alpha = 0.5 for transparency.

pl <- dl %>%
  do(
    plots = ggplot(data=., aes(x = date, y = value)) +
      geom_point() +
      annotate('rect', xmin = as.Date("1970-01-01"), xmax = as.Date("2008-01-01"), 
               ymin = -Inf, ymax = Inf, alpha = 0.5) +
      stat_smooth(method="auto",size=1.5) +
      facet_wrap(~transform, scales="free_y") 
  )  
pl[[1,2]]



回答2:

Okay, the issue here is the construction of the data frame is non-trivial. Two uses of outer join does not provide the required structure.

# Prepare NBER recession start end dates.
recessions <- data.frame(start = as.Date(as.character(nberDates()[,"Start"]),"%Y%m%d"),
                    end= as.Date(as.character(nberDates()[,"End"]),"%Y%m%d"))

# Create the long format data frame
dl <- economics %>% 
        gather(metric, value, pce:unemploy ) %>%
        group_by(metric) %>%
        mutate(diff = value - lag(value, default=first(value))) %>%
        mutate(pct = diff/value) %>%
        gather(transform, value, value:pct ) #%>%

# Build the data frame with start and end dates given in recessions 
df1 <- dl %>% 
        mutate(dummy=TRUE) %>% 
        left_join(recessions %>% mutate(dummy=TRUE)) %>% 
        filter(date >= start & date <= end) %>% 
        select(-dummy) 

# Build data frame of all other dates with start=NA and end=NA
df2 <- dl %>% 
        mutate(dummy=TRUE) %>% 
        left_join(recessions %>% mutate(dummy=TRUE)) %>% 
        mutate(start=NA, end=NA) %>%
        unique() %>%
        select(-dummy) 
# Now merge the two.  Overwirte NA values with start and end dates
dl <- df2 %>% 
      left_join(x=., y=df1, by="date") %>%
      mutate(date, start = ifelse(is.na(start.y), as.character(start.x), as.character(start.y)),end = ifelse(is.na(end.y), as.character(end.x), as.character(end.y))) %>%
      mutate(start=as.Date(start), end=as.Date(end) ) %>%
      select(-starts_with("start."),-starts_with("end."),-ends_with(".y")) %>% 
      setNames(sub(".x", "", names(.))) %>%
      mutate(ymin = -Inf) %>% #min(value)) %>%
      mutate(ymax = Inf) #max(value)) #%>%
# Check the start end dates are present
dl %>% group_by(metric,transform, start, end) %>% summarise( count = n() ) %>% print(n=180)

pl <- dl %>%
        group_by(metric) %>%
        do(
          plots = ggplot(data=., aes(x = date, y = value)) +
                      geom_point() +
                      # annotate('rect', xmin = start, xmax = end, 
                      #          ymin = ymin, ymax = ymax, alpha = 0.5) +
                      geom_rect(aes(xmin = start, xmax = end, ymin = ymin, ymax = ymax), na.rm=TRUE) +
                      stat_smooth(method="auto",size=1.5) +
                      facet_wrap(~transform, scales="free_y") 
          )

grid.draw(pl[[1,2]])



标签: r ggplot2 dplyr