可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效,请关闭广告屏蔽插件后再试):
问题:
If the input is zero I want to make an array which looks like this:
[1,0,0,0,0,0,0,0,0,0]
and if the input is 5:
[0,0,0,0,0,1,0,0,0,0]
For the above I wrote:
np.put(np.zeros(10),5,1)
but it did not work.
Is there any way in which, this can be implemented in one line?
回答1:
Usually, when you want to get a one-hot encoding for classification in machine learning, you have an array of indices.
import numpy as np
nb_classes = 6
targets = np.array([[2, 3, 4, 0]]).reshape(-1)
one_hot_targets = np.eye(nb_classes)[targets]
The one_hot_targets
is now
array([[[ 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0.],
[ 0., 0., 0., 0., 1., 0.],
[ 1., 0., 0., 0., 0., 0.]]])
The .reshape(-1)
is there to make sure you have the right labels format (you might also have [[2], [3], [4], [0]]
). The -1
is a special value which means "put all remaining stuff in this dimension". As there is only one, it flattens the array.
Copy-Paste solution
def get_one_hot(targets, nb_classes):
res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
return res.reshape(list(targets.shape)+[nb_classes])
Package
You can use mpu.ml.indices2one_hot. It's tested and simple to use:
import mpu.ml
one_hot = mpu.ml.indices2one_hot([1, 3, 0], nb_classes=5)
回答2:
Something like :
np.array([int(i == 5) for i in range(10)])
Should do the trick.
But I suppose there exist other solutions using numpy.
edit : the reason why your formula does not work : np.put does not return anything, it just modifies the element given in first parameter. The good answer while using np.put()
is :
a = np.zeros(10)
np.put(a,5,1)
The problem is that it can't be done in one line, as you need to define the array before passing it to np.put()
回答3:
Taking a quick look at the manual, you will see that np.put
does not return a value. While your technique is fine, you are accessing None
instead of your result array.
For a 1-D array it is better to just use direct indexing, especially for such a simple case.
Here is how to rewrite your code with minimal modification:
arr = np.zeros(10)
np.put(arr, 5, 1)
Here is how to do the second line with indexing instead of put
:
arr[5] = 1
回答4:
The problem here is that you save your array nowhere. The put
function works in place on the array and returns nothing. Since you never give your array a name you can not address it later. So this
one_pos = 5
x = np.zeros(10)
np.put(x, one_pos, 1)
would work, but then you could just use indexing:
one_pos = 5
x = np.zeros(10)
x[one_pos] = 1
In my opinion that would be the correct way to do this if no special reason exists to do this as a one liner. This might also be easier to read and readable code is good code.
回答5:
The np.put
mutates its array arg in-place. It's conventional in Python for functions / methods that perform in-place mutation to return None
; np.put
adheres to that convention. So if a
is a 1D array and you do
a = np.put(a, 5, 1)
then a
will get replaced by None
.
Your code is similar to that, but it passes an un-named array to np.put
.
A compact & efficient way to do what you want is with a simple function, eg:
import numpy as np
def one_hot(i):
a = np.zeros(10, 'uint8')
a[i] = 1
return a
a = one_hot(5)
print(a)
output
[0 0 0 0 0 1 0 0 0 0]
回答6:
Use np.identify
or np.eye
. You can try something like this with your input i, and the array size s:
np.identify(s)[i:i+1]
For example, print(np.identity(5)[0:1])
will result:
[[ 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
If you are using TensorFlow, you can use tf.one_hot
: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#one_hot
回答7:
You could use List comprehension:
[0 if i !=5 else 1 for i in range(10)]
turns to
[0,0,0,0,0,1,0,0,0,0]
回答8:
import time
start_time = time.time()
z=[]
for l in [1,2,3,4,5,6,1,2,3,4,4,6,]:
a= np.repeat(0,10)
np.put(a,l,1)
z.append(a)
print("--- %s seconds ---" % (time.time() - start_time))
#--- 0.00174784660339 seconds ---
import time
start_time = time.time()
z=[]
for l in [1,2,3,4,5,6,1,2,3,4,4,6,]:
z.append(np.array([int(i == l) for i in range(10)]))
print("--- %s seconds ---" % (time.time() - start_time))
#--- 0.000400066375732 seconds ---
回答9:
I'm not sure the performance, but the following code works and it's neat.
x = np.array([0, 5])
x_onehot = np.identity(6)[x]