这是一个事实举世公认的是,R的基础重塑命令是迅速而有力,但有悲惨的语法。 因此,我写它周围的快速包装,我会扔在下一版本的taRifx
包。 我这样做之前,但是,我想征求改进。
这里是我的版本,从@RichieCotton更新:
# reshapeasy: Version of reshape with way, way better syntax
# Written with the help of the StackOverflow R community
# x is a data.frame to be reshaped
# direction is "wide" or "long"
# vars are the names of the (stubs of) the variables to be reshaped (if omitted, defaults to everything not in id or vary)
# id are the names of the variables that identify unique observations
# vary is the variable that varies. Going to wide this variable will cease to exist. Going to long it will be created.
# omit is a vector of characters which are to be omitted if found at the end of variable names (e.g. price_1 becomes price in long)
# ... are options to be passed to stats::reshape
reshapeasy <- function( data, direction, id=(sapply(data,is.factor) | sapply(data,is.character)), vary=sapply(data,is.numeric), omit=c("_","."), vars=NULL, ... ) {
if(direction=="wide") data <- stats::reshape( data=data, direction=direction, idvar=id, timevar=vary, ... )
if(direction=="long") {
varying <- which(!(colnames(data) %in% id))
data <- stats::reshape( data=data, direction=direction, idvar=id, varying=varying, timevar=vary, ... )
}
colnames(data) <- gsub( paste("[",paste(omit,collapse="",sep=""),"]$",sep=""), "", colnames(data) )
return(data)
}
请注意,您可以从广角移动到长而不改变比其他方向的选项。 对我来说,这是关键的可用性。
我很高兴,如果你聊天或E-mail给我你的信息给在功能帮助文件确认任何实质性的改善。
改进可能落在在以下几个方面:
- 命名的功能和它的参数
- 更一般化的(目前处理一个相当具体的情况下,我认为是目前最常见的,但它尚未用尽统计的能力::重塑)
- 代码改进
例子
样本数据
x.wide <- structure(list(surveyNum = 1:6, pio_1 = structure(c(2L, 2L, 1L,
2L, 1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), pio_2 = structure(c(2L, 1L, 2L, 1L,
2L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), pio_3 = structure(c(2L, 2L, 1L, 1L,
2L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), caremgmt_1 = structure(c(2L, 1L, 1L,
2L, 1L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), caremgmt_2 = structure(c(1L, 2L, 2L,
2L, 2L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), caremgmt_3 = structure(c(1L, 2L, 1L,
2L, 1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), prev_1 = structure(c(1L, 2L, 2L, 1L,
1L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), prev_2 = structure(c(2L, 2L, 1L, 2L,
1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), prev_3 = structure(c(2L, 1L, 2L, 2L,
1L, 1L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2"), class = "factor"), price_1 = structure(c(2L, 1L, 2L, 5L,
3L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2", "3", "4", "5", "6"), class = "factor"), price_2 = structure(c(6L,
5L, 5L, 4L, 4L, 2L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2", "3", "4", "5", "6"), class = "factor"), price_3 = structure(c(3L,
5L, 2L, 5L, 4L, 5L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("1",
"2", "3", "4", "5", "6"), class = "factor")), .Names = c("surveyNum",
"pio_1", "pio_2", "pio_3", "caremgmt_1", "caremgmt_2", "caremgmt_3",
"prev_1", "prev_2", "prev_3", "price_1", "price_2", "price_3"
), idvars = "surveyNum", rdimnames = list(structure(list(surveyNum = 1:24), .Names = "surveyNum", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"
), class = "data.frame"), structure(list(variable = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("pio",
"caremgmt", "prev", "price"), class = "factor"), .id = c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)), .Names = c("variable",
".id"), row.names = c("pio_1", "pio_2", "pio_3", "caremgmt_1",
"caremgmt_2", "caremgmt_3", "prev_1", "prev_2", "prev_3", "price_1",
"price_2", "price_3"), class = "data.frame")), row.names = c(NA,
6L), class = c("cast_df", "data.frame"))
x.long <- structure(list(.id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), pio = structure(c(2L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
caremgmt = structure(c(2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L), .Label = c("1", "2"), class = "factor"), prev = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), price = structure(c(2L, 1L, 2L,
5L, 3L, 4L, 1L, 5L, 4L, 3L, 1L, 2L, 6L, 6L, 5L, 4L, 6L, 3L,
5L, 6L, 3L, 1L, 2L, 4L, 3L, 5L, 2L, 5L, 4L, 5L, 6L, 6L, 4L,
6L, 4L, 1L, 2L, 3L, 1L, 2L, 2L, 5L, 1L, 6L, 1L, 3L, 4L, 3L,
6L, 5L, 5L, 4L, 4L, 2L, 2L, 2L, 6L, 3L, 1L, 4L, 4L, 5L, 1L,
3L, 6L, 1L, 3L, 5L, 1L, 3L, 6L, 2L), .Label = c("1", "2",
"3", "4", "5", "6"), class = "factor"), surveyNum = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L)), .Names = c(".id",
"pio", "caremgmt", "prev", "price", "surveyNum"), row.names = c(NA,
-72L), class = "data.frame")
例子
> x.wide
surveyNum pio_1 pio_2 pio_3 caremgmt_1 caremgmt_2 caremgmt_3 prev_1 prev_2 prev_3 price_1 price_2 price_3
1 1 2 2 2 2 1 1 1 2 2 2 6 3
2 2 2 1 2 1 2 2 2 2 1 1 5 5
3 3 1 2 1 1 2 1 2 1 2 2 5 2
4 4 2 1 1 2 2 2 1 2 2 5 4 5
5 5 1 2 2 1 2 1 1 1 1 3 4 4
6 6 1 2 1 2 1 1 2 1 1 4 2 5
> reshapeasy( x.wide, "long", NULL, id="surveyNum", vary="id", sep="_" )
surveyNum id pio caremgmt prev price
1.1 1 1 2 2 1 2
2.1 2 1 2 1 2 1
3.1 3 1 1 1 2 2
4.1 4 1 2 2 1 5
5.1 5 1 1 1 1 3
6.1 6 1 1 2 2 4
1.2 1 2 2 1 2 6
2.2 2 2 1 2 2 5
3.2 3 2 2 2 1 5
4.2 4 2 1 2 2 4
5.2 5 2 2 2 1 4
6.2 6 2 2 1 1 2
1.3 1 3 2 1 2 3
2.3 2 3 2 2 1 5
3.3 3 3 1 1 2 2
4.3 4 3 1 2 2 5
5.3 5 3 2 1 1 4
6.3 6 3 1 1 1 5
> head(x.long)
.id pio caremgmt prev price surveyNum
1 1 2 2 1 2 1
2 1 2 1 2 1 2
3 1 1 1 2 2 3
4 1 2 2 1 5 4
5 1 1 1 1 3 5
6 1 1 2 2 4 6
> head(reshapeasy( x.long, direction="wide", id="surveyNum", vary=".id" ))
surveyNum pio.1 caremgmt.1 prev.1 price.1 pio.3 caremgmt.3 prev.3 price.3 pio.2 caremgmt.2 prev.2 price.2
1 1 2 2 1 2 2 1 2 3 2 1 2 6
2 2 2 1 2 1 2 2 1 5 1 2 2 5
3 3 1 1 2 2 1 1 2 2 2 2 1 5
4 4 2 2 1 5 1 2 2 5 1 2 2 4
5 5 1 1 1 3 2 1 1 4 2 2 1 4
6 6 1 2 2 4 1 1 1 5 2 1 1 2