我有dataframes的数据帧,然后我分成三个(或任何数目)。
我试图做的是自动处理每个数据帧的每一列,并添加现有变量的滞后版本。
举例来说,如果有在每个data.frame(V1,V2,V3)三个变量我想自动(没有硬编码)添加V1.lag,V2.lag和V3.lag。
这里是我到目前为止,但现在我卡住了。
任何帮助将非常apprecaited。
dd<-data.frame(matrix(rnorm(216),72,3),c(rep("A",24),rep("B",24),rep("C",24)),c(rep("J",36),rep("K",36)));
colnames(dd) <- c("v1", "v2", "v3", "dim1", "dim2");
dd;
dds <- split(dd, dd$dim1);
dds;
# Missing step 1: Automatically create v1.lag, v2.lag, v3.lag, etc (if required)
最后,我想这三个数据帧合并成一个大的数据帧,其中将包括新创建的变量。
# Missing step 2: Merge data frames into single data frame
任何帮助将高度赞赏。
编辑:在评论部分我问了一下移动,而不是滞后的平均值。 这里是解决方案:
ma <- function(x, f=c(1,1,1)){as.numeric(filter(x, f, sides=1)/length(f));}
foo <- function(df, f = c(1,1,1)) {
nums <- sapply(df, is.numeric); ## which are numeric vars
nams <- paste(names(df)[nums], "ma", length(f), sep = "."); ## generate new names foo.ma
df[, nams] <- lapply(which(nums), function(id, df, f) ma(df[[id]], f = f), df = df, f = f); ## apply ma to each numeric variable
df; ## return
}
这里有一个选项:
## reuse @Andrie's clag() function as lag() is silly
clag <- function(x, n = 1) c(rep(NA, n), head(x, -n))
## wrapper function to do the addition of lag variables for single DF
foo <- function(df, n = 1) {
nums <- sapply(df, is.numeric) ## which are numeric vars
nams <- paste(names(df)[nums], "lag", sep = ".") ## generate new names foo.lag
df[, nams] <- lapply(which(nums), function(id, df, n) clag(df[[id]], n = n),
df = df, n = n) ## apply clag to each numeric variable
df ## return
}
lapply(dds, foo)
这使:
> lapply(dds, foo)
$A
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
1 -1.15107343 1.47671548 -0.146501739 A J NA NA NA
2 -1.61068272 -0.85397093 -1.240187604 A J -1.15107343 1.47671548 -0.146501739
3 -1.23470282 -0.26194027 1.938344030 A J -1.61068272 -0.85397093 -1.240187604
4 -0.57874043 -0.44600138 0.326069423 A J -1.23470282 -0.26194027 1.938344030
5 0.16139066 -1.95804742 -0.744678169 A J -0.57874043 -0.44600138 0.326069423
6 -1.01497027 0.36850034 1.532640065 A J 0.16139066 -1.95804742 -0.744678169
7 0.72288058 -0.40115543 -0.686450596 A J -1.01497027 0.36850034 1.532640065
8 -0.51300447 0.19686310 0.441649595 A J 0.72288058 -0.40115543 -0.686450596
9 0.95439966 -2.03513002 -0.897784897 A J -0.51300447 0.19686310 0.441649595
10 -1.36736081 -0.41040962 -0.459403176 A J 0.95439966 -2.03513002 -0.897784897
11 0.59503846 0.28925760 -0.003095389 A J -1.36736081 -0.41040962 -0.459403176
12 -0.37951869 0.49551357 0.269412108 A J 0.59503846 0.28925760 -0.003095389
13 -0.52953401 -0.28433351 1.125505917 A J -0.37951869 0.49551357 0.269412108
14 -1.73466020 0.25442637 -1.094139749 A J -0.52953401 -0.28433351 1.125505917
15 0.08479137 -0.11688894 -1.034378216 A J -1.73466020 0.25442637 -1.094139749
16 -2.45854464 0.15806266 -2.275995527 A J 0.08479137 -0.11688894 -1.034378216
17 1.10663502 1.28587230 0.070334868 A J -2.45854464 0.15806266 -2.275995527
18 -0.01945585 1.63659116 -0.137040232 A J 1.10663502 1.28587230 0.070334868
19 0.59026606 -1.95724134 -0.480014930 A J -0.01945585 1.63659116 -0.137040232
20 -0.32245933 1.35372005 1.348717525 A J 0.59026606 -1.95724134 -0.480014930
21 -0.42560327 -1.30145328 2.020609480 A J -0.32245933 1.35372005 1.348717525
22 1.19550777 0.18417336 0.099232994 A J -0.42560327 -1.30145328 2.020609480
23 1.20198621 0.05926023 -0.171505810 A J 1.19550777 0.18417336 0.099232994
24 -1.00667141 1.32441782 0.056696824 A J 1.20198621 0.05926023 -0.171505810
$B
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
25 0.7878614 0.10354576 -0.69308980 B J NA NA NA
26 0.5824551 0.42319616 0.42734938 B J 0.7878614 0.10354576 -0.69308980
27 -0.2769730 1.51559382 -0.64106570 B J 0.5824551 0.42319616 0.42734938
28 -0.5736416 -1.58745816 -1.13274631 B J -0.2769730 1.51559382 -0.64106570
29 -1.9082145 -0.26148604 -0.04699411 B J -0.5736416 -1.58745816 -1.13274631
30 -1.6254549 0.39390814 -1.79993619 B J -1.9082145 -0.26148604 -0.04699411
31 0.3963274 1.79667985 0.92873142 B J -1.6254549 0.39390814 -1.79993619
32 -0.5889415 -0.04690351 1.43394978 B J 0.3963274 1.79667985 0.92873142
33 0.4683819 -1.34023029 0.18749782 B J -0.5889415 -0.04690351 1.43394978
34 0.7373052 -0.93470320 -1.14528378 B J 0.4683819 -1.34023029 0.18749782
35 -0.7751348 -1.26533917 0.11246728 B J 0.7373052 -0.93470320 -1.14528378
36 1.7786627 -0.19757164 0.14150980 B J -0.7751348 -1.26533917 0.11246728
37 1.8570412 -2.15174901 1.07751105 B K 1.7786627 -0.19757164 0.14150980
38 0.5128697 0.40112948 -0.94826274 B K 1.8570412 -2.15174901 1.07751105
39 0.8710264 -0.59978467 0.54462858 B K 0.5128697 0.40112948 -0.94826274
40 -0.3711512 -0.15632337 0.15832543 B K 0.8710264 -0.59978467 0.54462858
41 1.4505624 0.20915835 2.59369653 B K -0.3711512 -0.15632337 0.15832543
42 0.0871329 0.25440471 0.30096063 B K 1.4505624 0.20915835 2.59369653
43 -0.7398342 -1.72678544 0.45534941 B K 0.0871329 0.25440471 0.30096063
44 0.1953264 -0.60560630 -0.36884626 B K -0.7398342 -1.72678544 0.45534941
45 -0.2702493 0.50747209 -0.50699830 B K 0.1953264 -0.60560630 -0.36884626
46 0.2987449 0.46347722 1.20725190 B K -0.2702493 0.50747209 -0.50699830
47 -0.5682779 -0.71470625 -0.07865078 B K 0.2987449 0.46347722 1.20725190
48 -1.5291983 1.80092050 -1.73317395 B K -0.5682779 -0.71470625 -0.07865078
$C
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
49 0.06095825 -0.518263220 0.510999371 C K NA NA NA
50 0.40077713 0.477989115 0.855752036 C K 0.06095825 -0.518263220 0.510999371
51 0.06763037 0.802110426 -0.102536186 C K 0.40077713 0.477989115 0.855752036
52 -0.90530986 -0.005452101 -0.089703589 C K 0.06763037 0.802110426 -0.102536186
53 -0.79360209 0.299844218 -0.765164525 C K -0.90530986 -0.005452101 -0.089703589
54 1.34050298 -1.093705314 -0.955952912 C K -0.79360209 0.299844218 -0.765164525
55 0.45377712 0.054978470 0.382874895 C K 1.34050298 -1.093705314 -0.955952912
56 0.95283101 -0.564193352 1.458002944 C K 0.45377712 0.054978470 0.382874895
57 1.09157807 -1.351894599 -1.366084414 C K 0.95283101 -0.564193352 1.458002944
58 2.71993062 -1.126272793 1.374046159 C K 1.09157807 -1.351894599 -1.366084414
59 -0.04685281 0.423085481 -0.455903151 C K 2.71993062 -1.126272793 1.374046159
60 -0.31055449 0.818291875 0.400386018 C K -0.04685281 0.423085481 -0.455903151
61 -0.54904545 1.542272313 0.648135340 C K -0.31055449 0.818291875 0.400386018
62 -0.72914142 1.495482707 -0.212135011 C K -0.54904545 1.542272313 0.648135340
63 -0.27374611 -1.309254707 -0.005125047 C K -0.72914142 1.495482707 -0.212135011
64 0.87439910 -2.666588138 1.043778597 C K -0.27374611 -1.309254707 -0.005125047
65 1.07142042 0.446233778 -0.286784683 C K 0.87439910 -2.666588138 1.043778597
66 -0.10431808 0.510820156 0.405309569 C K 1.07142042 0.446233778 -0.286784683
67 -1.04006019 -0.041327622 1.202855549 C K -0.10431808 0.510820156 0.405309569
68 0.41084794 -0.376796559 -1.147032471 C K -1.04006019 -0.041327622 1.202855549
69 0.88329788 -0.344611311 1.862998306 C K 0.41084794 -0.376796559 -1.147032471
70 -0.67916248 1.396061431 0.697517685 C K 0.88329788 -0.344611311 1.862998306
71 3.55359528 -0.207825480 -0.949834845 C K -0.67916248 1.396061431 0.697517685
72 0.11329113 0.294747300 -0.955891419 C K 3.55359528 -0.207825480 -0.949834845
对于最后一点,联合收割机步,保存以上:
dds <- lapply(dds, foo)
然后使用do.call()
到rbind()
的单个数据帧一起,如下所示:
df2 <- do.call(rbind, dds)
这使:
> head(df2)
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
A.1 -1.1510734 1.4767155 -0.1465017 A J NA NA NA
A.2 -1.6106827 -0.8539709 -1.2401876 A J -1.1510734 1.4767155 -0.1465017
A.3 -1.2347028 -0.2619403 1.9383440 A J -1.6106827 -0.8539709 -1.2401876
A.4 -0.5787404 -0.4460014 0.3260694 A J -1.2347028 -0.2619403 1.9383440
A.5 0.1613907 -1.9580474 -0.7446782 A J -0.5787404 -0.4460014 0.3260694
A.6 -1.0149703 0.3685003 1.5326401 A J 0.1613907 -1.9580474 -0.7446782
使用plyr
包来完成这一切的一个步骤:
library(plyr)
clag <- function(x, n=1)c(rep(NA, n), head(x, -n))
x <- ddply(dd, .(dim1), transform,
v1.lag=clag(v1), v2.lag=clag(v2), v3.lag=clag(v3))
head(x)
v1 v2 v3 dim1 dim2 v1.lag v2.lag v3.lag
1 0.4465910 -0.2564334 -0.9122640 A J NA NA NA
2 -0.3748563 -0.9461061 0.1641274 A J 0.4465910 -0.2564334 -0.9122640
3 -0.5010834 -0.4413026 -0.7509968 A J -0.3748563 -0.9461061 0.1641274
4 -0.5278584 -0.6377017 0.5528831 A J -0.5010834 -0.4413026 -0.7509968
5 -0.4290586 0.4687849 0.6885102 A J -0.5278584 -0.6377017 0.5528831
6 0.1179935 -0.2742456 -0.1945482 A J -0.4290586 0.4687849 0.6885102