Predicting the tensorflow model

2019-08-03 13:26发布

问题:

I am new to Machine Learning. I am studying the Iris dataset. And used Sepal length, Sepal width, Petal length to predict Petal Width using neural network. Thus making 3 input nodes as A1 with bias b1, 10 hidden node as A2 with bias b2 and 1 output node. Further, x_val_train, x_val_test,y_val_train,y_val_test variables are used for training and testing The main function is as below.

x_val = np.array([x[0:3] for x in iris.data])
y_val = np.array([x[3] for x in iris.data])

hidden_layer_size = 10

#Generate a 1D array of random numbers range round(len(x_val)*0.8
train_indices = np.random.choice(len(x_val), round(len(x_val)*0.8), replace = False)

#Create a set which does not contain the numbers in train_indices and turn it into array
test_indices = np.array(list(set(range(len(x_val))) - set(train_indices)))
#print("Train Indexes\n",train_indices,test_indices)

x_val_train = x_val[train_indices]
x_val_test = x_val[test_indices]
y_val_train = y_val[train_indices]
y_val_test = y_val[test_indices]


x_data = tf.placeholder(shape=[None, 3], dtype = tf.float32)
y_target = tf.placeholder(shape = [None, 1], dtype = tf.float32) #Figure out usage of None

#Create Layers for NN

A1 = tf.Variable(tf.random_normal(shape = [3,hidden_layer_size])) #Input -> Hidden
b1 = tf.Variable(tf.random_normal(shape = [hidden_layer_size])) #bias in Input for hidden

A2 = tf.Variable(tf.random_normal(shape = [hidden_layer_size,1])) #Hidden -> Output
b2 = tf.Variable(tf.random_normal(shape=[1])) #Hidden Layer Bias

#Generation of Model

hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))
final_output = tf.nn.relu(tf.add(tf.matmul(hidden_output,A2),b2))

cost = tf.reduce_mean(tf.square(y_target - final_output))

learning_rate = 0.01

model = tf.train.AdamOptimizer(learning_rate).minimize(cost)

init = tf.global_variables_initializer()

sess.run(init)

#Training Loop

loss_vec = []
test_loss = []
epoch = 500

for i in range(epoch):
    #generates len(x_val_train) random numbers
    rand_index = np.random.choice(len(x_val_train), size = batch_size)
    #Get len(x_val_train) data with its 3 input notes or 
    rand_x = x_val_train[rand_index]
    #print(rand_index,rand_x)
    rand_y = np.transpose([y_val_train[rand_index]])
    sess.run(model, feed_dict = {x_data: rand_x, y_target: rand_y})

    temp_loss = sess.run(cost, feed_dict = {x_data: rand_x, y_target : rand_y})
    loss_vec.append(np.sqrt(temp_loss))

    test_temp_loss = sess.run(cost, feed_dict = {x_data : x_val_test, y_target : np.transpose([y_val_test])})
    test_loss.append(np.sqrt(test_temp_loss))

    if (i+1)%50!=0:
        print('Generation: ' + str(i+1) + '.loss = ' + str(temp_loss))

    predict = tf.argmax(tf.add(tf.matmul(hidden_output,A2),b2), 1)

    test = np.matrix('2  3  4')
    pred = predict.eval(session = sess, feed_dict = {x_data : test})


print("pred: ", pred)

plt.plot(loss_vec, 'k-', label='Train Loss')
plt.plot(test_loss, 'r--', label='Test Loss')
plt.show()

Also, In this code, hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))`

I have successfully trained my model after normalizing my data. But i need to predict the output by user input data.

Here,

test = np.matrix('2  3  4')
pred = predict.eval(session = sess, feed_dict = {x_data : test})

print("pred: ", pred)

I have written this code to predict the result, but pred always returns 0. I also tried for more than 100 samples, It still returns 0. Can you please tell me where i am getting wrong?

回答1:

Summary

Let's take a look at

predict = tf.argmax(tf.add(tf.matmul(hidden_output,A2),b2), 1)

This is (almost) equal to

predict = tf.argmax(final_output)

The argmax is the main issue. If final_output was a 1-hot encoding then argmax would make sense, but final_output is just an array of scalars.

Full Working Code

Here is the full working code given that you have

import numpy as np
import tensorflow as tf

import os
import urllib

# Data sets
IRIS_TRAINING = "iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST = "iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

# If the training and test sets aren't stored locally, download them.
if not os.path.exists(IRIS_TRAINING):
  raw = urllib.urlopen(IRIS_TRAINING_URL).read()
  with open(IRIS_TRAINING, "w") as f:
    f.write(raw)

if not os.path.exists(IRIS_TEST):
  raw = urllib.urlopen(IRIS_TEST_URL).read()
  with open(IRIS_TEST, "w") as f:
    f.write(raw)

training_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)

x_val_train = training_set.data[:,:3]
x_val_test = test_set.data[:,:3]
y_val_train = training_set.data[:,3].reshape([-1,1])
y_val_test = test_set.data[:,3].reshape([-1,1])

x_data = tf.placeholder(shape=[None, 3], dtype = tf.float32)
y_target = tf.placeholder(shape = [None, 1], dtype = tf.float32) #Figure out usage of None

#Create Layers for NN
hidden_layer_size = 20

A1 = tf.Variable(tf.random_normal(shape = [3,hidden_layer_size])) #Input -> Hidden
b1 = tf.Variable(tf.random_normal(shape = [hidden_layer_size])) #bias in Input for hidden

A2 = tf.Variable(tf.random_normal(shape = [hidden_layer_size,1])) #Hidden -> Output
b2 = tf.Variable(tf.random_normal(shape = [1])) #Hidden Layer Bias

#Generation of model

hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))
final_output = tf.add(tf.matmul(hidden_output,A2),b2)

loss = tf.reduce_mean(tf.square(y_target - final_output))

learning_rate = 0.01
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

#Training Loop

loss_vec = []
test_loss = []
epoch = 2000
batch_size = 100


def oneTrainingSession(epoch,loss_vec,test_loss,batch_size) :
    rand_index = np.random.choice(len(x_val_train), size = batch_size)

    rand_x = x_val_train #[rand_index,:]
    rand_y = y_val_train #[rand_index,:]

    temp_loss,_ = sess.run([loss,train], feed_dict = {x_data: rand_x, y_target : rand_y})
    loss_vec.append(np.sqrt(temp_loss))

    test_temp_loss = sess.run(loss, feed_dict = {x_data : x_val_test, y_target : y_val_test})
    test_loss.append(np.sqrt(test_temp_loss))

    if (i+1)%500 == 0:
        print('Generation: ' + str(i+1) + '.loss = ' + str(temp_loss))

for i in range(epoch):
    oneTrainingSession(epoch,loss_vec,test_loss,batch_size)

test = x_val_test[:3,:]
print "The test values are"
print test
print ""
pred = sess.run(final_output, feed_dict = {x_data : test})
print("pred: ", pred)

Output

Generation: 500.loss = 0.12768
Generation: 1000.loss = 0.0389756
Generation: 1500.loss = 0.0370268
Generation: 2000.loss = 0.0361797
The test values are
[[ 5.9000001   3.          4.19999981]
 [ 6.9000001   3.0999999   5.4000001 ]
 [ 5.0999999   3.29999995  1.70000005]]

('pred: ', array([[ 1.45187187],
       [ 1.92516518],
       [ 0.36887735]], dtype=float32))