I would like to do some sequence prediction in tensorflow using GRU. so I have created the same model in 2 different ways as follows:
In model 1 I have a 2 GRUs, one after the other, that is, the new_state1
, the final hidden state of the first GRU, acts as the initial state to the second GRU. Therefore, the model outputs new_state1
and new_state2
consequentially. Note that this is not a 2 layer model, but only 1 layer. From the code below, I divided the input and the output into 2 parts where GRU1 takes the first part, and the second GRU takes the second part.
Also the random_seed
is set and fixed for both model so that results can be comparable.
Model 1
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps1 = 500
time_steps2 = seq_length - time_steps1
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
input_1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="input_1")
input_2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="input_2")
labels1 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps1, 1], name="labels_1")
labels2 = tf.placeholder(dtype=m_dtype, shape=[None, time_steps2, 1], name="labels_2")
labels = tf.concat([labels1, labels2], axis=1, name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
def model(input_feat1, input_feat2):
with tf.variable_scope("GRU"):
cell1 = tf.nn.rnn_cell.GRUCell(cell_size)
cell2 = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("First50"):
# output1: shape=[1, time_steps1, 32]
output1, new_state1 = tf.nn.dynamic_rnn(cell1, input_feat1, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("Second50"):
# output2: shape=[1, time_steps2, 32]
output2, new_state2 = tf.nn.dynamic_rnn(cell2, input_feat2, dtype=m_dtype, initial_state=new_state1)
with tf.variable_scope("output"):
# output shape: [1, time_steps1 + time_steps2, 32] => [1, 100, 32]
output = tf.concat([output1, output2], axis=1)
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps1 + time_steps2, 1])
with tf.variable_scope("outputs_1_2_reshaped"):
output1 = tf.slice(input_=output, begin=[0, 0, 0], size=[-1, time_steps1, -1])
output2 = tf.slice(input_=output, begin=[0, time_steps1, 0], size=[-1, time_steps2, 1])
print(output.get_shape().as_list(), "1")
print(output1.get_shape().as_list(), "2")
print(output2.get_shape().as_list(), "3")
return output, output1, output2, initial_state, new_state1, new_state2
output, output1, output2, initial_state, new_state1, new_state2 = model(input_1, input_2)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
to_run_list = [new_state1, new_state2]
in1 = np.reshape(x_t[:time_steps1], newshape=(1, time_steps1, 1))
in2 = np.reshape(x_t[time_steps1:], newshape=(1, time_steps2, 1))
l1 = np.reshape(x_t_plus_1[:time_steps1], newshape=(1, time_steps1, 1))
l2 = np.reshape(x_t_plus_1[time_steps1:], newshape=(1, time_steps2, 1))
i_s = np.zeros([1, cell_size])
new_s1, new_s2 = sess.run(to_run_list, feed_dict={input_1: in1,
input_2: in2,
labels1: l1,
labels2: l2,
initial_state: i_s})
print(np.shape(new_s1), np.shape(new_s2))
print(np.mean(new_s1), np.mean(new_s2))
print(np.sum(new_s1), np.sum(new_s2))
In this model, Instead of having 2 different GRU, I created one, and I divided the input and labels into 2 different parts as well, and I used a for loop to iterate over my input dataset. Then the final state is taken and fed back into the same model as initial state.
Note that both model1 and model2 have the very first initial state of zeros.
Model 2
import tensorflow as tf
import numpy as np
cell_size = 32
seq_length = 1000
time_steps = 500
x_t = np.arange(1, seq_length + 1)
x_t_plus_1 = np.arange(2, seq_length + 2)
tf.set_random_seed(123)
m_dtype = tf.float32
inputs = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="inputs")
labels = tf.placeholder(dtype=m_dtype, shape=[None, time_steps, 1], name="labels")
initial_state = tf.placeholder(shape=[None, cell_size], dtype=m_dtype, name="initial_state")
grads_initial_state = tf.placeholder(dtype=m_dtype, shape=[None, cell_size], name="prev_grads")
this_is_last_batch = tf.placeholder(dtype=tf.bool, name="this_is_last_batch")
def model(input_feat):
with tf.variable_scope("GRU"):
cell = tf.nn.rnn_cell.GRUCell(cell_size)
with tf.variable_scope("cell"):
# output1: shape=[1, time_steps, 32]
output, new_state = tf.nn.dynamic_rnn(cell, input_feat, dtype=m_dtype, initial_state=initial_state)
with tf.variable_scope("output"):
output = tf.reshape(output, shape=[-1, cell_size])
output = tf.layers.dense(output, units=1)
output = tf.reshape(output, shape=[1, time_steps, 1])
print(output.get_shape().as_list(), "1")
return output, new_state
output, new_state = model(inputs)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 1000 // 500 = 2
num_iterations = seq_length // time_steps
print("num_iterations:", num_iterations)
final_states = []
to_run_list = [grads_wrt_initial_state, new_state]
for i in range(num_iterations):
current_xt = x_t[i * time_steps: (i + 1)*time_steps]
current_xt_plus_1 = x_t_plus_1[i*time_steps: (i + 1)*time_steps]
in1 = np.reshape(current_xt, newshape=(1, time_steps, 1))
l1 = np.reshape(current_xt_plus_1, newshape=(1, time_steps, 1))
i_s = np.zeros([1, cell_size])
if i == 0:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: i_s})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
else:
new_s = sess.run(new_state, feed_dict={inputs: in1,
labels: l1,
initial_state: final_states[-1]})
final_states.append(new_s)
print("---->", np.mean(final_states[-1]), np.sum(final_states[-1]), i)
Finally, after printing out the statistics of new_state1
and new_state2
in model1, they were different from the new_state
, after each iteration, in model2.
I would like to know how to fix this problem and why is that happening.
Edit:
I have figured out that the weights values of the gru in both files are different
Now how can I reproduce the same results in 2 the different files even after setting the random seed?
Any help is much appreciated!!!