I have a dataframe like so:
set.seed(34)
startingframe <- data.frame(
group1=factor(rep(c("a","b"),each=3,times=1)),
time=rep(1:3,each=1,times=2),
othercolumn=rnorm(1:6)
)
...which I am trying to manipulate into a list that splits by group and each 'iteration' of the time
column, results looking like so:
$a1
group1 time othercolumn
1 a 1 -0.13889
$a2
group1 time othercolumn
1 a 1 -0.138890
2 a 2 1.199813
$a3
group1 time othercolumn
1 a 1 -0.1388900
2 a 2 1.1998129
3 a 3 -0.7477224
$b1
group1 time othercolumn
4 b 1 -0.5752482
$b2
group1 time othercolumn
4 b 1 -0.5752482
5 b 2 -0.2635815
$b3
group1 time othercolumn
4 b 1 -0.5752482
5 b 2 -0.2635815
6 b 3 -0.4554921
I think I just need the iteration bit incorporated into this code:
split_list <- split(startingframe,list(startingframe$group1,startingframe$time))
EDIT: I may have oversimplified the original problem, but the real dataset has additional columns that would need to be brought through in the list.
You could use head
for every number in the sequence 1:nrow(x)
, for each sub-group x
of your data
out <- lapply(split(df, df$group1), function(x) lapply(1:nrow(x), head, x = x))
out <- unlist(out, recursive = F)
out
# $`a1`
# group1 time
# 1: a 1
#
# $a2
# group1 time
# 1: a 1
# 2: a 2
#
# $a3
# group1 time
# 1: a 1
# 2: a 2
# 3: a 3
#
# $b1
# group1 time
# 1: b 1
#
# $b2
# group1 time
# 1: b 1
# 2: b 2
#
# $b3
# group1 time
# 1: b 1
# 2: b 2
# 3: b 3
You might not need to create a list of all these dataframes though, you could just create a list of indices and use them as needed.
inds <-
lapply(split(seq(nrow(df)), df$group1), function(x)
lapply(1:length(x), function(y) x[seq(y)]))
inds
# $`a`
# $`a`[[1]]
# [1] 1
#
# $`a`[[2]]
# [1] 1 2
#
# $`a`[[3]]
# [1] 1 2 3
#
#
# $b
# $b[[1]]
# [1] 4
#
# $b[[2]]
# [1] 4 5
#
# $b[[3]]
# [1] 4 5 6
df[inds$b[[2]]]
# group1 time
# 1: b 1
# 2: b 2
Not splitting here, but generating the desired output.
genlist = function(l,t){return((data.frame(group1=rep(x = l,times=t),time=seq(from=1,to = t,by = 1))))}
mapply(genlist,startingframe$group1,startingframe$time,SIMPLIFY = FALSE)
One way to do this would be using lapply
inside another lapply
lapply(levels(startingframe$group1), function(group) {
df = startingframe[startingframe$group1 == group, ]
lapply(df$time, function(time) {
df[df$time %in% 1:time, ]
})
})
[[1]]
[[1]][[1]]
group1 time
1 a 1
[[1]][[2]]
group1 time
1 a 1
2 a 2
[[1]][[3]]
group1 time
1 a 1
2 a 2
3 a 3
[[2]]
[[2]][[1]]
group1 time
4 b 1
[[2]][[2]]
group1 time
4 b 1
5 b 2
[[2]][[3]]
group1 time
4 b 1
5 b 2
6 b 3