Reshaping an array to data.frame

2019-03-15 02:45发布

问题:

I have the following data structure (an "atomic vector?") output from daply in plyr, in which I had the function return three different measures for each subject, condition, and item.

x = structure(c(-0.93, 0.39, 0.88, 0.63, 0.86, -0.69, 1.02, 0.29, 0.94, 
0.93, -0.01, 0.79, 0.32, 0.14, 0.13, -0.07, -0.63, 0.26, 0.07, 0.87,
-0.36, 1.043, 0.33, -0.12, -0.055, 0.07, 0.67, 0.48, 0.002, 0.008, 
-0.19, -1.39, 0.98, 0.43, -0.02, -0.15,-0.08, 0.74, 0.96, 0.44, -0.005,
1.09, 0.36, 0.04, 0.09, 0.17, 0.68, 0.51, 0.09, 0.12, -0.05, 0.11,
0.99, 0.62, 0.13, 0.06, 0.27, 0.74, 0.96, 0.45), .Dim = c(5L, 
2L, 2L, 3L), .Dimnames = structure(list(Subject = c("s1", "s2", 
"s3", "s4", "s5"), Cond = c("A", "B"), Item = c("1", "2"), c("Measure1", 
"Measure2", "Measure3")), .Names = c("Subject", "Cond", 
"Item", "")))

I want to change it to look like:

Subject Cond Item Measure1 Measure2 Measure3
     s1    A    1    -0.93   -0.360   -0.005
     s1    A    2    -0.01    -0.19    -0.05 
     s1    B    1    -0.69    0.070     0.17
     s1    B    2    -0.07    -0.15     0.06
     s2    A    1     0.39    1.043    1.090
     s2    A    2     0.79    -1.39     0.11
     s2    B    1     1.02    0.670     0.68
     s2    B    2    -0.63    -0.08     0.27

etc.

Is there an easy way to do this?

回答1:

Yes, use adply():

adply(x, c(1,2,3))
   Subject Cond Item Measure1 Measure2 Measure3
1       s1    A    1    -0.93   -0.360   -0.005
2       s2    A    1     0.39    1.043    1.090
3       s3    A    1     0.88    0.330    0.360
4       s4    A    1     0.63   -0.120    0.040
5       s5    A    1     0.86   -0.055    0.090
6       s1    B    1    -0.69    0.070    0.170
7       s2    B    1     1.02    0.670    0.680
8       s3    B    1     0.29    0.480    0.510
9       s4    B    1     0.94    0.002    0.090
10      s5    B    1     0.93    0.008    0.120
11      s1    A    2    -0.01   -0.190   -0.050
12      s2    A    2     0.79   -1.390    0.110
13      s3    A    2     0.32    0.980    0.990
14      s4    A    2     0.14    0.430    0.620
15      s5    A    2     0.13   -0.020    0.130
16      s1    B    2    -0.07   -0.150    0.060
17      s2    B    2    -0.63   -0.080    0.270
18      s3    B    2     0.26    0.740    0.740
19      s4    B    2     0.07    0.960    0.960
20      s5    B    2     0.87    0.440    0.450


回答2:

Use as.data.frame.table().

df0 <- as.data.frame.table(x)
head(df0)

#   Subject Cond Item     Var4  Freq
# 1      s1    A    1 Measure1 -0.93
# 2      s2    A    1 Measure1  0.39
# 3      s3    A    1 Measure1  0.88
# 4      s4    A    1 Measure1  0.63
# 5      s5    A    1 Measure1  0.86
# 6      s1    B    1 Measure1 -0.69

library(tidyr)
df1 <- spread(data = df0, key = Var4, value = Freq)
head(df1)

#   Subject Cond Item Measure1 Measure2 Measure3
# 1      s1    A    1    -0.93   -0.360   -0.005
# 2      s1    A    2    -0.01   -0.190   -0.050
# 3      s1    B    1    -0.69    0.070    0.170
# 4      s1    B    2    -0.07   -0.150    0.060
# 5      s2    A    1     0.39    1.043    1.090
# 6      s2    A    2     0.79   -1.390    0.110


回答3:

df = melt(x) gives you something very similar to what you want. Then you could compute the various measure variables from the different levels of measure.

Using the "reshape2" package, try:

dcast(melt(x), Subject + Cond + Item ~ Var4)


回答4:

ftable pretty much gets you where you need to be:

y <- ftable(x)
y
#
#                    Measure1 Measure2 Measure3
# Subject Cond Item                            
# s1      A    1       -0.930   -0.360   -0.005
#              2       -0.010   -0.190   -0.050
#         B    1       -0.690    0.070    0.170
#              2       -0.070   -0.150    0.060
# s2      A    1        0.390    1.043    1.090
#              2        0.790   -1.390    0.110
#         B    1        1.020    0.670    0.680
#              2       -0.630   -0.080    0.270
# s3      A    1        0.880    0.330    0.360
#              2        0.320    0.980    0.990
#         B    1        0.290    0.480    0.510
#              2        0.260    0.740    0.740
# s4      A    1        0.630   -0.120    0.040
#              2        0.140    0.430    0.620
#         B    1        0.940    0.002    0.090
#              2        0.070    0.960    0.960
# s5      A    1        0.860   -0.055    0.090
#              2        0.130   -0.020    0.130
#         B    1        0.930    0.008    0.120
#              2        0.870    0.440    0.450

But, most people would probably prefer their data in a data.frame. Using as.data.frame.matrix extracts the values, but not the row and column names. ftable stores that information in row.vars and col.vars attributes.

attributes(y)$row.vars
# $Subject
# [1] "s1" "s2" "s3" "s4" "s5"
# 
# $Cond
# [1] "A" "B"
# 
# $Item
# [1] "1" "2"

attributes(y)$col.vars
# [[1]]
# [1] "Measure1" "Measure2" "Measure3"

We can use this information to write a function that converts an ftable to a data.frame:

ftable2df <- function(mydata) {
  ifelse(class(mydata) == "ftable", 
         mydata <- mydata, mydata <- ftable(mydata))
  dfrows <- rev(expand.grid(rev(attr(mydata, "row.vars"))))
  dfcols <- as.data.frame.matrix(mydata)
  names(dfcols) <- do.call(
    paste, c(rev(expand.grid(rev(attr(mydata, "col.vars")))), sep = "_"))
  cbind(dfrows, dfcols)
}

Here it is in use directly on your original "x":

ftable2df(x)
#    Subject Cond Item Measure1 Measure2 Measure3
# 1       s1    A    1    -0.93   -0.360   -0.005
# 2       s1    A    2    -0.01   -0.190   -0.050
# 3       s1    B    1    -0.69    0.070    0.170
# 4       s1    B    2    -0.07   -0.150    0.060
# 5       s2    A    1     0.39    1.043    1.090
# 6       s2    A    2     0.79   -1.390    0.110
# 7       s2    B    1     1.02    0.670    0.680
# 8       s2    B    2    -0.63   -0.080    0.270
# 9       s3    A    1     0.88    0.330    0.360
# 10      s3    A    2     0.32    0.980    0.990
# 11      s3    B    1     0.29    0.480    0.510
# 12      s3    B    2     0.26    0.740    0.740
# 13      s4    A    1     0.63   -0.120    0.040
# 14      s4    A    2     0.14    0.430    0.620
# 15      s4    B    1     0.94    0.002    0.090
# 16      s4    B    2     0.07    0.960    0.960
# 17      s5    A    1     0.86   -0.055    0.090
# 18      s5    A    2     0.13   -0.020    0.130
# 19      s5    B    1     0.93    0.008    0.120
# 20      s5    B    2     0.87    0.440    0.450