可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效，请关闭广告屏蔽插件后再试):

问题:

I have a python function given below:

def myfun(x):
    if x > 0:
        return 0
    else:
        return np.exp(x)

where np is the numpy library. I want to make the function vectorized in numpy, so I use:

vec_myfun = np.vectorize(myfun)

I did a test to evaluate the efficiency. First I generate a vector of 100 random numbers:

x = np.random.randn(100)

Then I run the following code to obtain the runtime:

%timeit np.exp(x)

%timeit vec_myfun(x)

The runtime for np.exp(x) is 1.07 µs ± 24.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each).

The runtime for vec_myfun(x) is 71.2 µs ± 1.68 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

My question is: compared to np.exp, vec_myfun has only one extra step to check the value of $x$, but it runs much slowly than np.exp. Is there an efficient way to vectorize myfun to make it as efficient as np.exp?

回答1:

ufunc like np.exp have a where parameter, which can be used as:

In [288]: x = np.random.randn(10)
In [289]: out=np.zeros_like(x)
In [290]: np.exp(x, out=out, where=(x<=0))
Out[290]: 
array([0.        , 0.        , 0.        , 0.        , 0.09407685,
       0.92458328, 0.        , 0.        , 0.46618914, 0.        ])
In [291]: x
Out[291]: 
array([ 0.37513573,  1.75273458,  0.30561659,  0.46554985, -2.3636433 ,
       -0.07841215,  2.00878429,  0.58441085, -0.76316384,  0.12431333])

This actually skips the calculation where the where is false.

In contrast:

np.where(arr > 0, 0, np.exp(arr))

calculates np.exp(arr) first for all arr (that's normal Python evaluation order), and then performs the where selection. With this exp that isn't a big deal, but with log it could be problems.

回答2:

Use np.where:

>>> x = np.random.rand(100,)
>>> %timeit np.exp(x)
1.22 µs ± 49.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
>>> %timeit np.where(x > 0, 0, np.exp(x))
4.09 µs ± 282 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

For comparison, your vectorized function runs in about 30 microseconds on my machine.

As to why it runs slower, it's just much more complicated than np.exp. It's doing lots of type deduction, broadcasting, and possibly making many calls to the actual method. Much of this happens in Python itself, while nearly everything in the call to np.exp (and the np.where version here) is in C.

回答3:

Just thinking outside of the box, what about implementing a function piecewise_exp() that basically multiplies np.exp() with arr < 0?

import numpy as np


def piecewise_exp(arr):
    return np.exp(arr) * (arr < 0)

Writing the code proposed so far as functions:

@np.vectorize
def myfun(x):
    if x > 0:
        return 0.0
    else:
        return np.exp(x)


def bnaeker_exp(arr):
    return np.where(arr > 0, 0, np.exp(arr))

And testing that everything is consistent:

np.random.seed(0)


# : test that the functions have the same behavior
num = 10
x = np.random.rand(num) - 0.5

print(x)
print(myfun(x))
print(piecewise_exp(x))
print(bnaeker_exp(x))

Doing some micro-benchmarks for small inputs:

# : micro-benchmarks for small inputs
num = 100
x = np.random.rand(num) - 0.5

%timeit np.exp(x)
# 1.63 µs ± 45.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
%timeit myfun(x)
# 54 µs ± 967 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit bnaeker_exp(x)
# 4 µs ± 87.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit piecewise_exp(x)
# 3.38 µs ± 59.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

... and for larger inputs:

# : micro-benchmarks for larger inputs
num = 100000
x = np.random.rand(num) - 0.5

%timeit np.exp(x)
# 32.7 µs ± 1.78 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit myfun(x)
# 44.9 ms ± 1.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit bnaeker_exp(x)
# 481 µs ± 25.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit piecewise_exp(x)
# 149 µs ± 2.65 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

This shows that piecewise_exp() is faster than anything else proposed so far, especially for larger inputs for which np.where() gets more inefficient since it uses integer indexing instead of boolean masks, and reasonably approaches np.exp() speed.

EDIT

Also, the performances of the np.where() version (bnaeker_exp()) do depend on the number of elements of the array actually satisfying the condition. If none of them does (like when you test on x = np.random.rand(100)), this is slightly faster than the boolean array multiplication version (piecewise_exp()) (128 µs ± 3.26 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) on my machine for n = 100000).