I'm having trouble passing this rle
function on a data.frame
. Function works great on another set:
fgroup <- aggregate(fevents2[,3:14], list(weeks = fevents2[, 1]), function(x) rle(x)$values)
Which yields the error:
Error in rle(x) : 'x' must be an atomic vector
Sample data:
> dput(fevents2[1:20,])
structure(list(weeks = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), A1M.Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("2012-05-09", "2012-05-10", "2012-05-11",
"2012-05-14", "2012-05-15", "2012-05-17", "2012-05-18", "2012-05-21",
"2012-05-22", "2012-05-24", "2012-05-25", "2012-05-28", "2012-05-29",
"2012-05-30", "2012-05-31", "2012-06-04", "2012-06-05", "2012-06-07",
"2012-06-08", "2012-06-11", "2012-06-12", "2012-06-14", "2012-06-15",
"2012-06-18", "2012-06-19", "2012-06-21", "2012-06-22"), class = "factor"),
vv = structure(c(8L, 8L, 8L, 20L, 24L, 24L, 24L, 1L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 24L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "nil"), class = "factor"),
rv = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
10L, 10L, 22L, 22L, 22L, 25L, 10L, 22L, 22L, 22L, 22L, 25L
), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), ja = structure(c(12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 25L, 25L,
12L, 24L, 24L, 24L, 24L, 24L, 24L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aa = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), bv = structure(c(25L, 11L, 11L, 11L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 23L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
aj = structure(c(7L, 7L, 7L, 25L, 25L, 25L, 25L, 25L, 9L,
9L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 25L, 25L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vb = structure(c(1L, 1L, 1L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 2L,
25L, 2L, 2L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G JR", "G RJ", "G RR",
"G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rj = structure(c(5L, 5L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JR", "G RJ",
"G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor"),
rr = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("C AA",
"C AJ", "C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV",
"C VB", "C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA",
"G JR", "G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"
), class = "factor"), vr = structure(c(5L, 5L, 5L, 25L, 25L,
7L, 7L, 7L, 7L, 7L, 25L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L), .Label = c("C AA", "C AJ", "C BB", "C BV", "C JA", "C JR",
"C RJ", "C RR", "C RV", "C VB", "C VR", "C VV", "G AA", "G AJ",
"G BB", "G BV", "G JA", "G JR", "G RJ", "G RR", "G RV", "G VB",
"G VR", "G VV", "nil"), class = "factor"), bb = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("C AA", "C AJ", "C BB", "C BV",
"C JA", "C JR", "C RJ", "C RR", "C RV", "C VB", "C VR", "C VV",
"G AA", "G AJ", "G BB", "G BV", "G JA", "G RJ", "G RR", "G RV",
"G VB", "G VR", "G VV", "nil"), class = "factor"), jr = structure(c(25L,
25L, 10L, 10L, 22L, 22L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 5L, 5L, 5L, 5L), .Label = c("C AA", "C AJ",
"C BB", "C BV", "C JA", "C JR", "C RJ", "C RR", "C RV", "C VB",
"C VR", "C VV", "G AA", "G AJ", "G BB", "G BV", "G JA", "G JR",
"G RJ", "G RR", "G RV", "G VB", "G VR", "G VV", "nil"), class = "factor")),
.Names = c("weeks",
"A1M.Date", "vv", "rv", "ja", "aa", "bv", "aj", "vb", "rj", "rr",
"vr", "bb", "jr"), row.names = c(NA, 20L), class = "data.frame")
Structure of data:
str(fevents2)
data.frame': 1430 obs. of 14 variables:
$ weeks : Factor w/ 7 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: Factor w/ 27 levels "2012-05-09","2012-05-10",..: 1 1 1 1 1 1 1 1 1 1 ...
$ vv : Factor w/ 24 levels "C AA","C AJ",..: 8 8 8 20 24 24 24 1 13 13 ..
$ rv : Factor w/ 25 levels "C AA","C AJ",..: 25 25 25 25 25 25 25 25 10 10 ...
$ ja : Factor w/ 25 levels "C AA","C AJ",..: 12 12 12 12 12 12 12 12 12 12 ...
$ aa : Factor w/ 25 levels "C AA","C AJ",..: 2 2 2 2 2 2 2 2 25 25 ...
$ bv : Factor w/ 25 levels "C AA","C AJ",..: 25 11 11 11 23 23 23 23 23 23 ...
$ aj : Factor w/ 25 levels "C AA","C AJ",..: 7 7 7 25 25 25 25 25 9 9 ...
$ vb : Factor w/ 25 levels "C AA","C AJ",..: 1 1 1 25 25 25 25 25 25 25 ...
$ rj : Factor w/ 23 levels "C AA","C AJ",..: 5 5 16 16 16 16 16 16 16 16 ...
$ rr : Factor w/ 25 levels "C AA","C AJ",..: 3 3 3 3 3 3 3 3 3 3 ...
$ vr : Factor w/ 25 levels "C AA","C AJ",..: 5 5 5 25 25 7 7 7 7 7 ...
$ bb : Factor w/ 24 levels "C AA","C AJ",..: 4 4 4 4 4 4 4 4 4 4 ...
$ jr : Factor w/ 25 levels "C AA","C AJ",..: 25 25 10 10 22 22 25 25 25 25 ...
NULL
I understand that I have factor
s, but converting factor
s to numeric
with
as.numeric(as.character(fevents2))
or:
sapply(fevents2, function(x) as.numeric(as.character(x)))
doesn't solve my issue:
Error in fevents3[, 3:14] : incorrect number of dimensions
In addition: Warning message:
In eval.with.vis(expr, envir, enclos) : NAs introduced by coercion
Here's a sample data.frame
on which the rle
function works:
dput(fevents[1:20,]
structure(list(weeks = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), A1M.Date = c("2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09", "2012-05-09",
"2012-05-09", "2012-05-09", "2012-05-09"), vv = c("C RR", "C RR",
"C RR", "G RR", "nil", "nil", "nil", "C AA", "G AA", "G AA",
"G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA", "G AA",
"G AA", "nil"), rv = c("nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "C VB", "C VB", "G VB", "G VB", "G VB", "nil",
"G VB", "G VB", "G VB", "G VB", "G VB", "nil"), ja = c("C VV",
"C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV", "C VV",
"C VV", "C VV", "nil", "nil", "G VV", "G VV", "G VV", "G VV",
"G VV", "G VV", "G VV"), aa = c("C AJ", "C AJ", "C AJ", "C AJ",
"C AJ", "C AJ", "C AJ", "C AJ", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "nil"), bv = c("nil",
"C VR", "C VR", "C VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR", "G VR",
"G VR", "G VR", "G VR"), aj = c("C RJ", "C RJ", "C RJ", "nil",
"nil", "nil", "nil", "nil", "C RV", "C RV", "G RV", "G RV", "G RV",
"G RV", "G RV", "G RV", "G RV", "G RV", "nil", "nil"), vb = c("C AA",
"C AA", "C AA", "nil", "nil", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "nil", "C AJ", "nil", "C AJ",
"C AJ"), rj = c("C JR", "C JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR", "G JR",
"G JR", "G JR", "G JR", "G JR", "G JR", "G JR"), rr = c("C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB", "C BB",
"C BB", "C BB", "C BB"), vr = c("C JA", "C JA", "C JA", "nil",
"nil", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "nil", "C RJ",
"C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ", "C RJ"
), bb = c("C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV", "C BV",
"C BV", "C BV", "C BV", "C BV", "C BV"), jr = c("nil", "nil",
"C VB", "C VB", "G VB", "G VB", "nil", "nil", "nil", "nil", "nil",
"nil", "nil", "nil", "nil", "nil", "C JA", "C JA", "C JA", "C JA"
)), .Names = c("weeks", "A1M.Date", "vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), row.names = c(NA,
20L), class = "data.frame")
str(fevents)
'data.frame': 1430 obs. of 14 variables:
$ weeks : num 1 1 1 1 1 1 1 1 1 1 ...
$ A1M.Date: chr "2012-05-09" "2012-05-09" "2012-05-09" "2012-05-09" ...
$ vv : chr "C RR" "C RR" "C RR" "G RR" ...
$ rv : chr "nil" "nil" "nil" "nil" ...
$ ja : chr "C VV" "C VV" "C VV" "C VV" ...
$ aa : chr "C AJ" "C AJ" "C AJ" "C AJ" ...
$ bv : chr "nil" "C VR" "C VR" "C VR" ...
$ aj : chr "C RJ" "C RJ" "C RJ" "nil" ...
$ vb : chr "C AA" "C AA" "C AA" "nil" ...
$ rj : chr "C JR" "C JR" "G JR" "G JR" ...
$ rr : chr "C BB" "C BB" "C BB" "C BB" ...
$ vr : chr "C JA" "C JA" "C JA" "nil" ...
$ bb : chr "C BV" "C BV" "C BV" "C BV" ...
$ jr : chr "nil" "nil" "C VB" "C VB" ...
I found a really "not elegant" workaround. Writing data.frame
to file as CSV and importing it with stringsAsFactors = FALSE
. This is not what I want to write in my code... There must be a simpler way to rearrange the structure of the data.frame
to please rle
?
The problem is that a factor is *not* an atomic vector as the error clearly says. Either convert all the factors to characters first (and not by coercing them to numeric!) or do the conversion inside the anonymous function you are applying.
So this, which implements the second idea, works:
after a fashion:
though I am not sure what you expected to get - there is only one week here and
aggregate
andrle
have stuck all the values together. Did you want separate$values
for each of the variables infevents2
that you are aggregating over?Another thing:
as.numeric(as.character(fevents2))
can't possibly work as the data are not numeric! and you can't apply those functions to a data frame and get anything like what you intended - if they work at all.The
sapply()
thing should work. Here is a version that checks whether each variable is a factor or not and coerces it if it is:But note
sapply()
simplifies to a matrix which will change theaggregate()
method dispatched:Instead perhaps
Now if you wanted to apply
rle()
to each column of the split-up data and keep the separate how aboutwhich gives
Which is the same answer as that for
aggregate()
above, but with eachrle()
output kept separate:[Note: This is only true here because the data snippet you show has just one week. I can't recall how
unlist(res))
will look if there is more than one week.]