可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
If, for argument's sake, I want the last five elements of a 10-length vector in Python, I can use the "-" operator in the range index so:
>>> x = range(10)
>>> x
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> x[-5:]
[5, 6, 7, 8, 9]
>>>
What is the best way to do this in R? Is there a cleaner way than my current technique which is to use the length() function?
> x <- 0:9
> x
[1] 0 1 2 3 4 5 6 7 8 9
> x[(length(x) - 4):length(x)]
[1] 5 6 7 8 9
>
The question is related to time series analysis btw where it is often useful to work only on recent data.
回答1:
see ?tail
and ?head
for some convenient functions:
> x <- 1:10
> tail(x,5)
[1] 6 7 8 9 10
For the argument's sake : everything but the last five elements would be :
> head(x,n=-5)
[1] 1 2 3 4 5
As @Martin Morgan says in the comments, there are two other possibilities which are faster than the tail solution, in case you have to carry this out a million times on a vector of 100 million values. For readibility, I'd go with tail.
test elapsed relative
tail(x, 5) 38.70 5.724852
x[length(x) - (4:0)] 6.76 1.000000
x[seq.int(to = length(x), length.out = 5)] 7.53 1.113905
benchmarking code :
require(rbenchmark)
x <- 1:1e8
do.call(
benchmark,
c(list(
expression(tail(x,5)),
expression(x[seq.int(to=length(x), length.out=5)]),
expression(x[length(x)-(4:0)])
), replications=1e6)
)
回答2:
The disapproval of tail
here based on speed alone doesn't really seem to emphasize that part of the slower speed comes from the fact that tail is safer to work with, if you don't for sure that the length of x will exceed n
, the number of elements you want to subset out:
x <- 1:10
tail(x, 20)
# [1] 1 2 3 4 5 6 7 8 9 10
x[length(x) - (0:19)]
#Error in x[length(x) - (0:19)] :
# only 0's may be mixed with negative subscripts
Tail will simply return the max number of elements instead of generating an error, so you don't need to do any error checking yourself. A great reason to use it. Safer cleaner code, if extra microseconds/milliseconds don't matter much to you in its use.
回答3:
You can do exactly the same thing in R with two more characters:
x <- 0:9
x[-5:-1]
[1] 5 6 7 8 9
or
x[-(1:5)]
回答4:
How about rev(x)[1:5]
?
x<-1:10
system.time(replicate(10e6,tail(x,5)))
user system elapsed
138.85 0.26 139.28
system.time(replicate(10e6,rev(x)[1:5]))
user system elapsed
61.97 0.25 62.23
回答5:
Here is a function to do it and seems reasonably fast.
endv<-function(vec,val)
{
if(val>length(vec))
{
stop("Length of value greater than length of vector")
}else
{
vec[((length(vec)-val)+1):length(vec)]
}
}
USAGE:
test<-c(0,1,1,0,0,1,1,NA,1,1)
endv(test,5)
endv(LETTERS,5)
BENCHMARK:
test replications elapsed relative
1 expression(tail(x, 5)) 100000 5.24 6.469
2 expression(x[seq.int(to = length(x), length.out = 5)]) 100000 0.98 1.210
3 expression(x[length(x) - (4:0)]) 100000 0.81 1.000
4 expression(endv(x, 5)) 100000 1.37 1.691
回答6:
I just add here something related. I was wanted to access a vector with backend indices, ie writting something like tail(x, i)
but to return x[length(x) - i + 1]
and not the whole tail.
Following commentaries I benchmarked two solutions:
accessRevTail <- function(x, n) {
tail(x,n)[1]
}
accessRevLen <- function(x, n) {
x[length(x) - n + 1]
}
microbenchmark::microbenchmark(accessRevLen(1:100, 87), accessRevTail(1:100, 87))
Unit: microseconds
expr min lq mean median uq max neval
accessRevLen(1:100, 87) 1.860 2.3775 2.84976 2.803 3.2740 6.755 100
accessRevTail(1:100, 87) 22.214 23.5295 28.54027 25.112 28.4705 110.833 100
So it appears in this case that even for small vectors, tail
is very slow comparing to direct access