Faster Way To Simultaneously Iterate Over Rolling

2019-07-20 10:50发布

问题:

I have two numpy arrays x and y. e.g.

x
Out[1]: 
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])
y
Out[1]: 
array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
       113, 114, 115, 116, 117, 118, 119])

I want to simultaneously iterate over a n period rolling window over x and y.

I would like to do this as quickly as possible, while keeping the rolling windows as numpy arrays. I shamelessly stole some code from the itertools documentation. And then made my own version that only operates on np.array's. But am wondering if anyone here could help me make my function faster?

My Source Code is:

from itertools import islice
import numpy as np

import time
class Timer( object ):

    def __init__(self):
        pass

    def __enter__(self):
        self.start = time.time()
        return self        

    def __exit__(self,a,b,c):
        print('ending')
        self.end = time.time()
        self.timetaken = self.end-self.start
        print( 'Took {0} seconds'.format( self.timetaken ))

def window(seq, n=2):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it     = iter(seq)
    result = list(islice(it, n))
    if len(result) == n:
        yield np.array( result ).T    
    for elem in it:
        result = result[1:] + [elem,]        
        yield np.array( result ).T

def npwindow( seq, n=2):
    Zt      = Z.T    
    r = Zt[:n]    
    for zt in Zt[n:]:
        r = np.roll( r, shift=-1, axis=0 )
        r[-1] = zt
        yield r.T


n = 100
N = 1000

x       = np.arange(N)
y       = np.arange(100,N+100)

Z       = np.array( [x,y] )

def npwindow_test( Z,n ):
    window2 = npwindow( Z,n )
    for w in window2:    
        pass
        #print( 'npwindow: {0}'.format( w ) )

def window_test( Z, n ):
    window1 = window( zip(*Z),n )
    for w in window1:
        pass
        #print( 'window: {0}'.format( w ) )

num_iter = 10
with Timer() as t0:
    for i in range(num_iter):
        npwindow_test( Z, n )

with Timer() as t1:
    for i in range(num_iter):
        window_test( Z, n )

print( ' ratio : {0}'.format( t0.timetaken / t1.timetaken ) )

回答1:

rolling window can be done by np.lib.stride_tricks.as_strided()

reference link: http://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html

Here is the code:

import numpy as np

n = 10
N = 20

x       = np.arange(N)
y       = np.arange(100,N+100)

Z       = np.vstack((x, y))

def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)


rZ = rolling_window(Z, n)

Then you can do calculation with rZ without for loop. If you need for loop, do the loop with axis 1:

for i in xrange(rZ.shape[1]):
    print rZ[:, i, :]

output:

[[  0   1   2   3   4   5   6   7   8   9]
 [100 101 102 103 104 105 106 107 108 109]]
[[  1   2   3   4   5   6   7   8   9  10]
 [101 102 103 104 105 106 107 108 109 110]]
[[  2   3   4   5   6   7   8   9  10  11]
 [102 103 104 105 106 107 108 109 110 111]]
[[  3   4   5   6   7   8   9  10  11  12]
 [103 104 105 106 107 108 109 110 111 112]]
[[  4   5   6   7   8   9  10  11  12  13]
 [104 105 106 107 108 109 110 111 112 113]]
[[  5   6   7   8   9  10  11  12  13  14]
 [105 106 107 108 109 110 111 112 113 114]]
[[  6   7   8   9  10  11  12  13  14  15]
 [106 107 108 109 110 111 112 113 114 115]]
[[  7   8   9  10  11  12  13  14  15  16]
 [107 108 109 110 111 112 113 114 115 116]]
[[  8   9  10  11  12  13  14  15  16  17]
 [108 109 110 111 112 113 114 115 116 117]]
[[  9  10  11  12  13  14  15  16  17  18]
 [109 110 111 112 113 114 115 116 117 118]]
[[ 10  11  12  13  14  15  16  17  18  19]
 [110 111 112 113 114 115 116 117 118 119]]