I am attempting to predict features in imagery using keras with a TensorFlow backend. Specifically, I am attempting to use a keras ImageDataGenerator
. The model is set to run for 4 epochs and runs fine until the 4th epoch where it fails with a MemoryError.
I am running this model on an AWS g2.2xlarge instance running Ubuntu Server 16.04 LTS (HVM), SSD Volume Type.
The training images are 256x256 RGB pixel tiles (8 bit unsigned) and the training mask is 256x256 single band (8 bit unsigned) tiled data where 255 == a feature of interest and 0 == everything else.
The following 3 functions are the ones pertinent to this error.
How can I resolve this MemoryError?
def train_model():
batch_size = 1
training_imgs = np.lib.format.open_memmap(filename=os.path.join(data_path, 'data.npy'),mode='r+')
training_masks = np.lib.format.open_memmap(filename=os.path.join(data_path, 'mask.npy'),mode='r+')
dl_model = create_model()
print(dl_model.summary())
model_checkpoint = ModelCheckpoint(os.path.join(data_path,'mod_weight.hdf5'), monitor='loss',verbose=1, save_best_only=True)
dl_model.fit_generator(generator(training_imgs, training_masks, batch_size), steps_per_epoch=(len(training_imgs)/batch_size), epochs=4,verbose=1,callbacks=[model_checkpoint])
def generator(train_imgs, train_masks=None, batch_size=None):
# Create empty arrays to contain batch of features and labels#
if train_masks is not None:
train_imgs_batch = np.zeros((batch_size,y_to_res,x_to_res,bands))
train_masks_batch = np.zeros((batch_size,y_to_res,x_to_res,1))
while True:
for i in range(batch_size):
# choose random index in features
index= random.choice(range(len(train_imgs)))
train_imgs_batch[i] = train_imgs[index]
train_masks_batch[i] = train_masks[index]
yield train_imgs_batch, train_masks_batch
else:
rec_imgs_batch = np.zeros((batch_size,y_to_res,x_to_res,bands))
while True:
for i in range(batch_size):
# choose random index in features
index= random.choice(range(len(train_imgs)))
rec_imgs_batch[i] = train_imgs[index]
yield rec_imgs_batch
def train_generator(train_images,train_masks,batch_size):
data_gen_args=dict(rotation_range=90.,horizontal_flip=True,vertical_flip=True,rescale=1./255)
image_datagen = ImageDataGenerator()
mask_datagen = ImageDataGenerator()
# # Provide the same seed and keyword arguments to the fit and flow methods
seed = 1
image_datagen.fit(train_images, augment=True, seed=seed)
mask_datagen.fit(train_masks, augment=True, seed=seed)
image_generator = image_datagen.flow(train_images,batch_size=batch_size)
mask_generator = mask_datagen.flow(train_masks,batch_size=batch_size)
return zip(image_generator, mask_generator)
The following os the output from the model detailing the epochs and the error message:
Epoch 00001: loss improved from inf to 0.01683, saving model to /home/ubuntu/deep_learn/client_data/mod_weight.hdf5
Epoch 2/4
7569/7569 [==============================] - 3394s 448ms/step - loss: 0.0049 - binary_crossentropy: 0.0027 - jaccard_coef_int: 0.9983
Epoch 00002: loss improved from 0.01683 to 0.00492, saving model to /home/ubuntu/deep_learn/client_data/mod_weight.hdf5
Epoch 3/4
7569/7569 [==============================] - 3394s 448ms/step - loss: 0.0049 - binary_crossentropy: 0.0026 - jaccard_coef_int: 0.9982
Epoch 00003: loss improved from 0.00492 to 0.00488, saving model to /home/ubuntu/deep_learn/client_data/mod_weight.hdf5
Epoch 4/4
7569/7569 [==============================] - 3394s 448ms/step - loss: 0.0074 - binary_crossentropy: 0.0042 - jaccard_coef_int: 0.9975
Epoch 00004: loss did not improve
Traceback (most recent call last):
File "image_rec.py", line 291, in <module>
train_model()
File "image_rec.py", line 208, in train_model
dl_model.fit_generator(train_generator(training_imgs,training_masks,batch_size),steps_per_epoch=1,epochs=1,workers=1)
File "image_rec.py", line 274, in train_generator
image_datagen.fit(train_images, augment=True, seed=seed)
File "/home/ubuntu/pyvirt_test/local/lib/python2.7/site-packages/keras/preprocessing/image.py", line 753, in fit
x = np.copy(x)
File "/home/ubuntu/pyvirt_test/local/lib/python2.7/site-packages/numpy/lib/function_base.py", line 1505, in copy
return array(a, order=order, copy=True)
MemoryError