extract nth value in nested list with map_dbl

2019-08-26 18:29发布

问题:

I'm trying to extract the nth value from a nested lists in a tibble. I'm trying to do this in the tidyverse.

my raw data is like this

> x %>% 
+   select( ., 
+           meta_event, 
+           meta_aktivitet, 
+           meta_subject_id, 
+           time, 
+           time_cum, 
+           vo2) -> 
+ x
> dput(head(x))
structure(list(meta_event = c("001", "001", "001", "001", "001", 
"001"), meta_aktivitet = c("001", "001", "001", "001", "001", 
"001"), meta_subject_id = c("100001", "100001", "100001", "100001", 
"100001", "100001"), time = c("0:10 min", "0:15 min", "0:20 min", 
"0:25 min", "0:30 min", "0:35 min"), time_cum = c(10, 15, 20, 
25, 30, 35), vo2 = c(1.665, 2.515, 2.641, 2.677, 2.66, 2.712)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

and nested

> x %>% 
+   group_by( ., 
+             meta_event, 
+             meta_aktivitet, 
+             meta_subject_id) %>% 
+   nest() -> 
+ x_n
> head(x_n)
# A tibble: 6 x 4
  meta_event meta_aktivitet meta_subject_id data      
  <chr>      <chr>          <chr>           <list>    
1 001        001            100001          <tibble [~
2 001        001            100002          <tibble [~
3 001        001            100003          <tibble [~
4 001        001            100004          <tibble [~
5 001        001            100005          <tibble [~
6 001        001            100006          <tibble [~

Here I try to extract some information, through mutate and map_dbl. I've tried different approsses without anyluck.

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = map_dbl( data, ~ nth( "time_cum", 
+                                              n = index_1, 
+                                              order_by = "time_cum"))) %>%
+   unnest() %>% View()
Error in mutate_impl(.data, dots) : 
  Evaluation error: length(n) == 1 is not TRUE.
In addition: There were 20 warnings (use warnings() to see them)

It seems to be failing on the last mutate. If I run the code without "vald_start" it produces the following result

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1) %>%
+   unnest() ->
+ x_r
There were 20 warnings (use warnings() to see them)
> x_r
# A tibble: 10,639 x 10
   meta_event meta_aktivitet meta_subject_id vald_end    h1 index_1 index_2
   <chr>      <chr>          <chr>              <dbl> <dbl>   <dbl>   <dbl>
 1 001        001            100001              4015  3115     491     490
 2 001        001            100001              4015  3115     491     490
 3 001        001            100001              4015  3115     491     490
 4 001        001            100001              4015  3115     491     490
 5 001        001            100001              4015  3115     491     490
 6 001        001            100001              4015  3115     491     490
 7 001        001            100001              4015  3115     491     490
 8 001        001            100001              4015  3115     491     490
 9 001        001            100001              4015  3115     491     490
10 001        001            100001              4015  3115     491     490
# ... with 10,629 more rows, and 3 more variables: time <chr>,
#   time_cum <dbl>, vo2 <dbl>
> dput(head(x_r))
structure(list(meta_event = c("001", "001", "001", "001", "001", 
"001"), meta_aktivitet = c("001", "001", "001", "001", "001", 
"001"), meta_subject_id = c("100001", "100001", "100001", "100001", 
"100001", "100001"), vald_end = c(4015, 4015, 4015, 4015, 4015, 
4015), h1 = c(3115, 3115, 3115, 3115, 3115, 3115), index_1 = c(491, 
491, 491, 491, 491, 491), index_2 = c(490, 490, 490, 490, 490, 
490), time = c("0:10 min", "0:15 min", "0:20 min", "0:25 min", 
"0:30 min", "0:35 min"), time_cum = c(10, 15, 20, 25, 30, 35), 
    vo2 = c(1.665, 2.515, 2.641, 2.677, 2.66, 2.712)), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

Can someone point me in the right direction with nth and map_dbl

as per suggestion from @ Moody_Mudskipper, I've tried the following code, without anyluck. I've changed the two strings "time_cum" in the nth function to .$time_cum

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = data %>% map_dbl( ., ~ nth( .$time_cum, 
+                                              n = index_1, 
+                                              order_by = .$time_cum))) %>%
+   unnest() %>% View()
Error in mutate_impl(.data, dots) : 
  Evaluation error: length(n) == 1 is not TRUE.
In addition: There were 20 warnings (use warnings() to see them)

If I change the code, so I reference use a number insted of index_1, then R provides the expected result. Therefor, I must be doing something wrong, when I reference to the index_1 column.

> x_n %>%
+   mutate( vald_end = data %>% map_dbl( ., function(x) max(x$time_cum)),
+           h1 = vald_end - 15*60,
+           index_1 = data %>% map_dbl( ., function(x) which.max(x$time_cum > h1)),
+           index_2 = index_1 - 1,
+           vald_start = data %>% map_dbl( ., ~ nth( .$time_cum, 
+                                              n = 490, 
+                                              order_by = .$time_cum))) -> 
+ x_r
There were 20 warnings (use warnings() to see them)
> x_r
# A tibble: 21 x 9
   meta_event meta_aktivitet meta_subject_id data        vald_end    h1 index_1 index_2 vald_start
   <chr>      <chr>          <chr>           <list>         <dbl> <dbl>   <dbl>   <dbl>      <dbl>
 1 001        001            100001          <tibble [5~     4015  3115     491     490       2470
 2 001        001            100002          <tibble [5~     3770  2870     459     458       3575
 3 001        001            100003          <tibble [3~     3840  2940     346     345         NA
 4 001        001            100004          <tibble [5~     3630  2730     456     455       3455
 5 001        001            100005          <tibble [5~     3945  3045     493     492       2460
 6 001        001            100006          <tibble [5~     3860  2960     422     421       3660
 7 001        001            100007          <tibble [4~     3480  2580     356     355         NA
 8 002        001            100001          <tibble [5~     4570  3670     498     497       2450
 9 002        001            100002          <tibble [5~     3755  2855     424     423       3640
10 002        001            100003          <tibble [4~     3560  2660     361     360         NA
# ... with 11 more rows

Greetings from Denmark.

Dan Olesen

标签: r dplyr purrr