take unique count and sum each unique values in R

2019-09-21 01:17发布

问题:

Case1: Input

ST_DATE ND_DATE LO_NO   ACTV_CODE   ACTV_AMT    AB_NO   FEATURE_CODE    L_NU    
7/27/16 7/27/16 265       O          15          1      INTEREST        855          
7/27/16 7/27/16 265       O          14          1      INTEREST 855  

Expected output

ST_DATE ND_DATE LO_NO   ACTV_CODE   ACTV_AMT    AB_NO   FEATURE_INTEREST     L_NU   
7/27/16 7/27/16 265      O           29          1             2             855

Case2: Input (my code is working for case2 but throwing error for case1)

ST_DATE ND_DATE LO_NO   ACTV_CODE   ACTV_AMT    AB_NO   FEATURE_CODE    L_NU    
7/27/16 7/27/16 265   O          15       1     INTEREST        855          
7/27/16 7/27/16 265   O          14       1     INSTALLMENT   855    

ST_DATE ND_DATE LO_NO   ACTV_CODE   ACTV_AMT    AB_NO   INTEREST INSTALLMENT     L_NU   
7/27/16 7/27/16 265      O           29           1      1          1           855


install_cntdup_less1 <- install_BAN %>% 
   group_by(AB_NO,LO_NO,L_NU)%>% 
   mutate(ACTV_AMT = sum(ACTV_AMT),ftr=sum(unique(!is.na(FEATURE_CODE))))%>%  
   spread(FEATURE_CODE,ftr,fill = 0)%>%
   slice(which.min(as.Date(ST_DATE, '%Y/%m/%d')))%>% 
   slice(which.max(as.Date(ND_DATE, '%Y/%m/%d'))) 

Getting the below error

Error: Duplicate identifiers for rows (29424, 29425, 29426), (7415, 7416), (30120, 30121)

tried introducing unique ID as mentioned below link but it is messing my output Spread with duplicate identifiers (using tidyverse and %>%)

mutate(ind = row_number()) %>%

I am unable to understand what to do can anyone help me in resolving the error. It may seem duplicate question but it is not

回答1:

This will help:

library(dplyr)
library(tidyr)

# example data
dt = read.table(text = "
                ST_DATE ND_DATE LO_NO   ACTV_CODE   ACTV_AMT    AB_NO   FEATURE_CODE    L_NU    
                7/27/16 7/27/16 265       O          15          1      INTEREST        855          
                7/27/16 7/27/16 265       OO          14          1      INTEREST        855
                7/27/16 7/27/16 265       O          15          1      OTHER        855          
                7/27/16 7/27/16 265       OO          14          1      OTHER        855 
                ", header=T, stringsAsFactors = F)

dt %>%
  group_by(AB_NO,LO_NO,L_NU)%>% 
  mutate(ACTV_AMT = sum(ACTV_AMT),
         ST_DATE = min(ST_DATE),
         ND_DATE = max(ND_DATE)) %>%
  ungroup() %>%
  mutate(id = row_number(),
         FEATURE_CODE = paste0("FEATURE_", FEATURE_CODE),
         ACTV_CODE = paste0("ACTV_", ACTV_CODE),
         count_FEATURE = 1,
         count_ACTV = 1) %>%
  spread(FEATURE_CODE, count_FEATURE) %>%
  spread(ACTV_CODE, count_ACTV) %>%
  select(-id) %>%
  group_by(ST_DATE, ND_DATE, LO_NO, ACTV_AMT, AB_NO, L_NU) %>%
  summarise_all(sum, na.rm=T) %>%
  ungroup()

# # A tibble: 1 x 10
#     ST_DATE ND_DATE LO_NO ACTV_AMT AB_NO  L_NU FEATURE_INTEREST FEATURE_OTHER ACTV_O ACTV_OO
#       <chr>   <chr> <int>    <int> <int> <int>            <dbl>         <dbl>  <dbl>   <dbl>
#   1 7/27/16 7/27/16   265       58     1   855                2             2      2       2