My Keras model is designed to take in two input time series, concatenate them, feed them through an LSTM, and do multilabel prediction on the next time step.
There are 50 training samples, with 24 time steps each, and 5625 labels each.
There are 12 validation samples, with 24 time steps each, and 5625 labels each.
When I try to validate the model, I get 'nan' for average_precision_score
. Why?
I've prepared an MWE below, illustrating the problem:
import numpy as np
from sklearn.metrics import average_precision_score
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Concatenate, multiply
from keras import optimizers
import tensorflow as tf
def model_definition():
tr_hours, val_hours = [], []
for i in np.arange(a_tr.shape[0]):
for j in np.arange(a_tr.shape[1]):
tr_hours.append(i+j)
for i in np.arange(a_val.shape[0]):
for j in np.arange(a_val.shape[1]):
val_hours.append(i+j)
tr_hours = np.asarray(tr_hours).reshape(a_tr.shape[0], a_tr.shape[1], 1)
val_hours = np.asarray(val_hours).reshape(a_val.shape[0], a_val.shape[1], 1)
num_time = a_tr.shape[2] + tr_hours.shape[2]
hours_in = Input(shape=(1,), batch_shape = (1, 1, tr_hours.shape[2]), name='hours_in')
seq_model_in = Input(shape=(1,), batch_shape=(1, 1, a_tr.shape[2]), name='seq_model_in')
t_concat = Concatenate(axis=-1)([seq_model_in, hours_in])
lstm_layer = LSTM(4, batch_input_shape=(1, 1, num_time), stateful=True)(t_concat)
dense_merged = Dense(a_tr.shape[2], activation="sigmoid", name='dense_after_lstm')(lstm_layer)
model = Model(inputs=[seq_model_in, hours_in], outputs=dense_merged)
return tr_hours, val_hours, model
def train_and_validate(a_tr, a_old_tr, a_val, a_old_val):
a_tr = a_tr[:, :-1, :]
y_tr = a_tr[:, -1, :]
a_val = a_val[:, :-1, :]
y_val = a_val[:, -1, :]
a_old_tr = a_old_tr[:, :-1, :]
y_old_val = a_old_val[:, -1, :]
y_old_tr = a_old_tr[:, -1, :]
seq_length = a_tr.shape[1]
tr_hours, val_hours, model = model_definition()
print model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#http://philipperemy.github.io/keras-stateful-lstm/
#TRAINING
for epoch in range(1): #one epoch for demo purposes
mean_tr_loss, mean_val_ap = [], []
for i in range(a_tr.shape[0]):
y_true_1 = np.expand_dims(y_tr[i,:], axis=1)
y_true = np.swapaxes(y_true_1, 0, 1)
for j in range(seq_length-1):
input_1 = np.expand_dims(np.expand_dims(a_tr[i][j], axis=1), axis=1)
input_1 = np.reshape(input_1, (1, 1, a_tr.shape[2]))
input_2 = np.expand_dims(np.expand_dims(np.array([tr_hours[i][j]]), axis=1), axis=1)
input_2 = np.reshape(input_2, (1, 1, tr_hours.shape[2]))
tr_loss = model.train_on_batch([input_1, input_2], y_true)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('loss training = {}'.format(np.mean(mean_tr_loss)))
#VALIDATION MWE
print 'validating, first sample only'
val_y_1 = np.expand_dims(y_val[0,:], axis=1)
val_y = np.swapaxes(val_y_1, 0, 1)
y_val_true = np.expand_dims(y_old_val[0,:], axis=1)
y_val_true = np.swapaxes(y_val_true, 0, 1)
val_seq = np.expand_dims(np.expand_dims(a_val[0][22], axis=1), axis=1)
val_seq = np.reshape(val_seq, (1, 1, a_val.shape[2]))
val_hours_use = np.expand_dims(np.array([val_hours[0][22]]), axis=1)
val_pred = model.predict_on_batch([val_seq, val_hours_use])
val_ap = average_precision_score(y_val_true, val_pred)
print 'validation average precision: ', val_ap
model.reset_states()
return val_ap
if __name__=='__main__':
a_tr = np.random.uniform(size=(50, 24, 5625))
a_old_tr = np.random.uniform(size=(50, 24, 5625))
a_val = np.random.uniform(size=(12, 24, 5625))
a_old_val = np.random.uniform(size=(50, 24, 5625))
a_test = np.random.uniform(size=(12, 24, 5625))
a_old_test = np.random.uniform(size=(50, 24, 5625))
a_old_tr[a_old_tr > 0.5] = 1.
a_old_tr[a_old_tr < 0.5] = 0.
a_old_val[a_old_val > 0.5] = 1.
a_old_val[a_old_val < 0.5] = 0.
train_and_validate(a_tr, a_old_tr, a_val, a_old_val)
Running the above code should give you something like this, in less than 30 seconds. Note average precision returns nan:
user@server:~/path/to/curr/dir$ python dummy_so.py
Using TensorFlow backend.
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
seq_model_in (InputLayer) (1, 1, 5625) 0
__________________________________________________________________________________________________
hours_in (InputLayer) (1, 1, 1) 0
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (1, 1, 5626) 0 seq_model_in[0][0]
hours_in[0][0]
__________________________________________________________________________________________________
lstm_1 (LSTM) (1, 4) 90096 concatenate_1[0][0]
__________________________________________________________________________________________________
dense_after_lstm (Dense) (1, 5625) 28125 lstm_1[0][0]
==================================================================================================
Total params: 118,221
Trainable params: 118,221
Non-trainable params: 0
__________________________________________________________________________________________________
None
2018-01-24 13:43:24.873725: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX
loss training = 0.346308231354
validating, first sample only
validation average precision: nan
user@server:~/path/to/curr/dir$
The same error is occurring even with a simpler model, with only one input:
def train_and_validate(a_tr, a_old_tr, a_val, a_old_val):
a_tr = a_tr[:, :-1, :]
y_tr = a_tr[:, -1, :]
a_val = a_val[:, :-1, :]
y_val = a_val[:, -1, :]
a_old_tr = a_old_tr[:, :-1, :]
y_old_val = a_old_val[:, -1, :]
y_old_tr = a_old_tr[:, -1, :]
seq_length = a_tr.shape[1]
#Define the model
seq_model_in = Input(shape=(1,), batch_shape=(1, 1, a_tr.shape[2]), name='seq_model_in')
lstm_layer = LSTM(4, batch_input_shape=(1, 1, a_tr.shape[2]), stateful=True)(seq_model_in)
dense_merged = Dense(a_tr.shape[2], activation="sigmoid", name='dense_after_lstm')(lstm_layer)
model = Model(inputs=seq_model_in, outputs=dense_merged)
print model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#http://philipperemy.github.io/keras-stateful-lstm/
#TRAINING (one epoch, for demo purposes)
mean_tr_loss, mean_val_ap = [], []
for i in range(a_tr.shape[0]):
y_true_1 = np.expand_dims(y_tr[i,:], axis=1)
y_true = np.swapaxes(y_true_1, 0, 1)
for j in range(seq_length-1):
input_1 = np.expand_dims(np.expand_dims(a_tr[i][j], axis=1), axis=1)
input_1 = np.reshape(input_1, (1, 1, a_tr.shape[2]))
tr_loss = model.train_on_batch(input_1, y_true)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('loss training = {}'.format(np.mean(mean_tr_loss)))
#VALIDATION MWE
print 'validating, first sample only'
val_y_1 = np.expand_dims(y_val[0,:], axis=1)
val_y = np.swapaxes(val_y_1, 0, 1)
y_val_true = np.expand_dims(y_old_val[0,:], axis=1)
y_val_true = np.swapaxes(y_val_true, 0, 1)
val_seq = np.expand_dims(np.expand_dims(a_val[0][22], axis=1), axis=1)
val_seq = np.reshape(val_seq, (1, 1, a_val.shape[2]))
val_pred = model.predict_on_batch(val_seq)
val_ap = average_precision_score(y_val_true, val_pred)
print 'validation average precision: ', val_ap
model.reset_states()
return val_ap
The problem lied in wrong (inversed) dimensions. Flattening matrices did the job: