fixed fill for different sections of a density plo

Given draws from a rnorm, and cutoff c I want my plot to use the following colors:

Red for the section that is to the left of -c
Blue for the section in between -c and c
and Green for the section that is to the right of c

For example, if my data is:

set.seed(9782)
mydata <- rnorm(1000, 0, 2)
c <- 1

I want to plot something like this:

But if my data is all to the right of c the whole plot should be green. Similarly, if all is between -c and c or to the left of -c the plot should be all red or blue.

This is the code I wrote:

MinD <- min(mydata)
MaxD <- max(mydata)

df.plot <- data.frame(density = mydata)

if(c==0){
  case <- dplyr::case_when((MinD < 0 & MaxD >0) ~ "L_and_R",
                           (MinD > 0) ~ "R",
                           (MaxD < 0) ~ "L")
}else{
  case <- dplyr::case_when((MinD < -c & MaxD >c) ~ "ALL",
                           (MinD > -c & MaxD > c) ~ "Center_and_R",
                           (MinD > -c & MaxD <c) ~ "Center",
                           (MinD < -c & MaxD < c) ~ "Center_and_L",
                           MaxD < -c ~ "L",
                           MaxD > c ~ "R")
}

# Draw the Center

if(case %in% c("ALL", "Center_and_R", "Center", "Center_and_L")){
  ds <- density(df.plot$density, from = -c, to = c)
  ds_data_Center <- data.frame(x = ds$x, y = ds$y, section="Center")
} else{
  ds_data_Center <- data.frame(x = NA, y = NA, section="Center")
}

# Draw L

if(case %in% c("ALL", "Center_and_L", "L", "L_and_R")){
  ds <- density(df.plot$density, from = MinD, to = -c)
  ds_data_L <- data.frame(x = ds$x, y = ds$y, section="L")
} else{
  ds_data_L <- data.frame(x = NA, y = NA, section="L")
}

# Draw R

if(case %in% c("ALL", "Center_and_R", "R", "L_and_R")){
  ds <- density(df.plot$density, from = c, to = MaxD)
  ds_data_R <- data.frame(x = ds$x, y = ds$y, section="R")
} else{
  ds_data_R <- data.frame(x = NA, y = NA, section="R")
}

L_Pr <- round(mean(mydata < -c),2)
Center_Pr <- round(mean((mydata>-c & mydata<c)),2)
R_Pr <- round(mean(mydata > c),2)

filldf <- data.frame(section = c("L", "Center", "R"), 
                     Pr = c(L_Pr, Center_Pr, R_Pr), 
                     fill = c("red", "blue", "green")) %>% 
  dplyr::mutate(section = as.character(section))


if(c==0){
  ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_L, ds_data_R)) %>% 
    dplyr::full_join(filldf, by = "section") %>% filter(Pr!=0) %>% 
    dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","R"))) 
  ds_data <- ds_data[order(ds_data$section), ] %>%  
    filter(Pr!=0) %>% 
    mutate(Pr=scales::percent(Pr))
}else{
  ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_Center, ds_data_L, ds_data_R)) %>% 
    dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","Center","R"))) 
  ds_data <- ds_data[order(ds_data$section), ] %>%  
    filter(Pr!=0) %>% 
    mutate(Pr=scales::percent(Pr))
}

fillScale <- scale_fill_manual(name = paste0("c = ", c, ":"),
                               values = as.character(unique(ds_data$fill)))

p <- ggplot(data = ds_data, aes(x=x, y=y, fill=Pr)) + 
  geom_area() + fillScale

Alas, I cannot figure out how to assign the colors to the different sections while keeping the percentages as labels for the colors.

标签： r ggplot2

1条回答

Anthone

2楼-- · 2019-07-27 03:20

We use the density function to create the data frame we'll actually plot. Then, We use the cut function to create groups using ranges of the data values. Finally, we calculate the probability mass for each group and use those as the actual legend labels.

We also create a labeled vector of colors to ensure that the same color always goes with a given range of x-values, regardless of whether the data contains any values within a given range of x-values.

The code below packages all this into a function.

library(tidyverse)
library(gridExtra)

fill_density = function(x, cc=1, adj=1, drop_levs=FALSE) {

  # Calculate density values for input data
  dens = data.frame(density(x, n=2^10, adjust=adj)[c("x","y")]) %>% 
    mutate(section = cut(x, breaks=c(-Inf, -1, cc, Inf))) %>% 
    group_by(section) %>% 
    mutate(prob = paste0(round(sum(y)*mean(diff(x))*100),"%"))

  # Get probability mass for each level of section
  # We'll use these as the label values in scale_fill_manual
  sp = dens %>% 
    group_by(section, prob) %>% 
    summarise %>% 
    ungroup

  if(!drop_levs) {
   sp = sp %>% complete(section, fill=list(prob="0%"))
  }

  # Assign colors to each level of section
  col = setNames(c("red","blue","green"), levels(dens$section))

  ggplot(dens, aes(x, y, fill=section)) +
    geom_area() +
    scale_fill_manual(labels=sp$prob, values=col, drop=drop_levs) +
    labs(fill="")
}

Now let's run the function on several different data distributions:

set.seed(3)
dat2 = rnorm(1000)
grid.arrange(fill_density(mydata), fill_density(mydata[mydata>0]),
             fill_density(mydata[mydata>2], drop_levs=TRUE), 
              fill_density(mydata[mydata>2], drop_levs=FALSE),
             fill_density(mydata[mydata < -5 | mydata > 5], adj=0.3), fill_density(dat2),
             ncol=2)

0人赞添加讨论(0) 举报

fixed fill for different sections of a density plo

采纳回答

编辑标签

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮

付费偷看金额在0.1-10元之间