This is a follow-up question from this one: Rowsums conditional on column name
My data frame is called wiod
and looks like this:
VAR1 VAR2 AUS1 ... AUS56 BEL1 ... BEL56 NLD1 ... NLD56
A D 23 ... 99 0 ... 444 123 ... 675
B D 55 ... 6456 0 ... 557 567 ... 4345
I'd like to calculate the row-sums for the variables AUS, BEL, NLD
and then drop the old variables. Like this:
wiot <- wiot %>%
mutate(AUS = rowSums(.[grep("AUS", names(.))])) %>%
mutate(BEL = rowSums(.[grep("BEL", names(.))])) %>%
mutate(NLD = rowSums(.[grep("NLD", names(.))])) %>%
select(Var1, Var2, AUS, BEL, NLD)
Of course, there is a large number of the variable groups, not just these three (43, to be precise). Is there any convenient way to do this without using 43 mutate commands?
It makes it easier to convert from wide format to long (gather), then summarise, and if needed convert back to wide (spread) format:
library(dplyr)
library(tidyr)
# dataframe from @989 http://stackoverflow.com/a/43519062
df1 %>%
gather(key = myKey, value = myValue, -c(VAR1, VAR2)) %>%
mutate(myGroup = gsub("\\d", "", myKey)) %>%
group_by(VAR1, VAR2, myGroup) %>%
summarise(mySum = sum(myValue)) %>%
spread(key = myGroup, value = mySum)
# Source: local data frame [2 x 5]
# Groups: VAR1, VAR2 [2]
#
# VAR1 VAR2 AUS BEL NLD
# * <fctr> <fctr> <int> <int> <int>
# 1 A D 122 444 798
# 2 B D 6511 557 4912
You could try this:
vec <- c("AUS", "BEL", "NLD")
cbind(df[,grep("VAR", names(df))],
sapply(vec, function(x) rowSums(df[,grep(x, names(df))])))
# VAR1 VAR2 AUS BEL NLD
#1 A D 122 444 798
#2 B D 6511 557 4912
You just need to load the vec
with your 43 variables.
data
df <- structure(list(VAR1 = structure(1:2, .Label = c("A", "B"), class = "factor"),
VAR2 = structure(c(1L, 1L), .Label = "D", class = "factor"),
AUS1 = c(23L, 55L), AUS56 = c(99L, 6456L), BEL1 = c(0L, 0L
), BEL56 = c(444L, 557L), NLD1 = c(123L, 567L), NLD56 = c(675L,
4345L)), .Names = c("VAR1", "VAR2", "AUS1", "AUS56", "BEL1",
"BEL56", "NLD1", "NLD56"), class = "data.frame", row.names = c(NA,
-2L))
Here is another version which uses some of tidyverse
functionality without gather
ing
library(tidyverse)
fSumN1 <- function(dat, pat){
pat1 <- paste(pat, collapse="|")
newN <- paste0(pats, "_sum")
dat1 <- dat %>%
select(-matches(pat1))
dat %>%
select(matches(pat1)) %>%
split.default(sub("\\d+", "", names(.))) %>%
map_df(rowSums) %>%
rename_at(.vars = pat, funs(paste0(pat, "_sum"))) %>%
bind_cols(dat1, .)
}
pats <- c("AUS", "AUT")
fSumN1(dfN, pats)
# VAR1 VAR2 VAR3 VAR4 AUS_sum AUT_sum
#1 A D 0 FCK 1246 3076
#2 B D 0 XYC 6678 3349
data
dfN <- structure(list(VAR1 = c("A", "B"), VAR2 = c("D", "D"), AUS1 = c(23L,
55L), AUS2 = c(234L, 76L), AUS3 = c(34L, 55L), AUS4 = c(856L,
36L), AUS56 = c(99L, 6456L), VAR3 = c(0L, 0L), VAR4 = c("FCK",
"XYC"), AUT1 = c(598L, 774L), AUT2 = c(992L, 503L), AUT3 = c(819L,
944L), AUT4 = c(368L, 717L), AUT56 = c(299L, 411L)), .Names = c("VAR1",
"VAR2", "AUS1", "AUS2", "AUS3", "AUS4", "AUS56", "VAR3", "VAR4",
"AUT1", "AUT2", "AUT3", "AUT4", "AUT56"), row.names = c(NA, -2L
), class = "data.frame")