Summary count by multiple groups with condition in

2020-04-14 03:39发布

问题:

I have a dataframe that looks like this:

data <- data.frame(a=c(1,1,0,0,0,0,1,1,1, 0), 
               b=c("x","x","x","x","x","y","y","y","z","z"),
               c=c(2, 1, 2, 3, 4, NA, 4, 2, 1, 1), 
               d= c("s", "m", "l", "l", "l", "m", "m", "s", "s", "m"))

I would like to find a way to create a new variable, e, that is a sum of the values in c when a=1 for every combination of d and b. I've tried several options that aren't giving me what I'm looking for, for instance:

data <- data %>% 
    group_by(d, b) %>% 
    summarise (e = sum(data$c[which(data$a=="x")]))

Something that ultimately looks like:

       d      b     e
1      s      x     2
2      m      x     1
3      l      x     9
4      m      y     4
5      s      y     2
6      s      z     1
7      s      z     1

But unfortunately, I am only getting a constant e? Any help appreciated!

回答1:

library(dplyr)

data <- data_frame(
  a=c(1,1,0,0,0,0,1,1,1, 0), 
                   b=c("x","x","x","x","x","y","y","y","z","z"),
                   c=c(2, 1, 2, 3, 4, NA, 4, 2, 1, 1), 
                   d= c("s", "m", "l", "l", "l", "m", "m", "s", "s", "m"))

data
#> # A tibble: 10 x 4
#>        a     b     c     d
#>    <dbl> <chr> <dbl> <chr>
#>  1     1     x     2     s
#>  2     1     x     1     m
#>  3     0     x     2     l
#>  4     0     x     3     l
#>  5     0     x     4     l
#>  6     0     y    NA     m
#>  7     1     y     4     m
#>  8     1     y     2     s
#>  9     1     z     1     s
#> 10     0     z     1     m

data %>% 
  group_by(d, b) %>% 
  mutate(e = if_else(a == 1, c, 0)) %>% 
  summarise(e = sum(e, na.rm = TRUE))

#> Source: local data frame [7 x 3]
#> Groups: d [?]
#> 
#> # A tibble: 7 x 3
#>       d     b     e
#>   <chr> <chr> <dbl>
#> 1     l     x     0
#> 2     m     x     1
#> 3     m     y     4
#> 4     m     z     0
#> 5     s     x     2
#> 6     s     y     2
#> 7     s     z     1

If you like, you can also just do this all in the summarise call:

summarise(e = if_else(a == 1, c, 0) %>% sum(na.rm = TRUE))


回答2:

We can use

library(dplyr) 
data %>%
     group_by(d, b) %>% 
     summarise(e = sum(c[a==1], na.rm = TRUE))

Or

data %>%
      group_by(d, b) %>% 
      summarise(e = sum((a==1)*c, na.rm = TRUE))


标签: r dplyr