I'm using the Keras TensorBoard callback.
I would like to run a grid search and visualize the results of each single model in the tensor board.
The problem is that all results of the different runs are merged together and the loss plot is a mess like this:
How can I rename each run to have something similar to this:
Here the code of the grid search:
df = pd.read_csv('data/prepared_example.csv')
df = time_series.create_index(df, datetime_index='DATE', other_index_list=['ITEM', 'AREA'])
target = ['D']
attributes = ['S', 'C', 'D-10','D-9', 'D-8', 'D-7', 'D-6', 'D-5', 'D-4',
'D-3', 'D-2', 'D-1']
input_dim = len(attributes)
output_dim = len(target)
x = df[attributes]
y = df[target]
param_grid = {'epochs': [10, 20, 50],
'batch_size': [10],
'neurons': [[10, 10, 10]],
'dropout': [[0.0, 0.0], [0.2, 0.2]],
'lr': [0.1]}
estimator = KerasRegressor(build_fn=create_3_layers_model,
input_dim=input_dim, output_dim=output_dim)
tbCallBack = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False)
grid = GridSearchCV(estimator=estimator, param_grid=param_grid, n_jobs=-1, scoring=bug_fix_score,
cv=3, verbose=0, fit_params={'callbacks': [tbCallBack]})
grid_result = grid.fit(x.as_matrix(), y.as_matrix())
I don't think there is any way to pass a "per-run" parameter to GridSearchCV
. Maybe the easiest approach would be to subclass KerasRegressor
to do what you want.
class KerasRegressorTB(KerasRegressor):
def __init__(self, *args, **kwargs):
super(KerasRegressorTB, self).__init__(*args, **kwargs)
def fit(self, x, y, log_dir=None, **kwargs):
cbs = None
if log_dir is not None:
params = self.get_params()
conf = ",".join("{}={}".format(k, params[k])
for k in sorted(params))
conf_dir = os.path.join(log_dir, conf)
cbs = [TensorBoard(log_dir=conf_dir, histogram_freq=0,
write_graph=True, write_images=False)]
super(KerasRegressorTB, self).fit(x, y, callbacks=cbs, **kwargs)
You would use it like:
# ...
estimator = KerasRegressorTB(build_fn=create_3_layers_model,
input_dim=input_dim, output_dim=output_dim)
#...
grid = GridSearchCV(estimator=estimator, param_grid=param_grid,
n_jobs=1, scoring=bug_fix_score,
cv=2, verbose=0, fit_params={'log_dir': './Graph'})
grid_result = grid.fit(x.as_matrix(), y.as_matrix())
Update:
Since GridSearchCV
runs the same model (i.e. the same configuration of parameters) more than once due to cross-validation, the previous code will end up putting multiple traces in each run. Looking at the source (here and here), there doesn't seem to be a way to retrieve the "current split id". At the same time, you shouldn't just check for existing folders and add subfixes as needed, because the jobs run (potentially at least, although I'm not sure if that's the case with Keras/TF) in parallel. You can try something like this:
import itertools
import os
class KerasRegressorTB(KerasRegressor):
def __init__(self, *args, **kwargs):
super(KerasRegressorTB, self).__init__(*args, **kwargs)
def fit(self, x, y, log_dir=None, **kwargs):
cbs = None
if log_dir is not None:
# Make sure the base log directory exists
try:
os.makedirs(log_dir)
except OSError:
pass
params = self.get_params()
conf = ",".join("{}={}".format(k, params[k])
for k in sorted(params))
conf_dir_base = os.path.join(log_dir, conf)
# Find a new directory to place the logs
for i in itertools.count():
try:
conf_dir = "{}_split-{}".format(conf_dir_base, i)
os.makedirs(conf_dir)
break
except OSError:
pass
cbs = [TensorBoard(log_dir=conf_dir, histogram_freq=0,
write_graph=True, write_images=False)]
super(KerasRegressorTB, self).fit(x, y, callbacks=cbs, **kwargs)
I'm using os
calls for Python 2 compatibility, but if you are using Python 3 you may consider the nicer pathlib
module for path and directory handling.
Note: I forgot to mention it earlier, but just in case, note that passing write_graph=True
will log a graph per run, which, depending on your model, could mean a lot (relatively speaking) of this space. The same would apply to write_images
, although I don't know the space that feature requires.
It's easy, just save logs to separate dirs with concatenated parameters string as dir name:
Here is example using date as name of run:
from datetime import datetime
datetime_str = ('{date:%Y-%m-%d-%H:%M:%S}'.format(date=datetime.now()))
callbacks = [
ModelCheckpoint(model_filepath, monitor='val_loss', save_best_only=True, verbose=0),
TensorBoard(log_dir='./logs/'+datetime_str, histogram_freq=0, write_graph=True, write_images=True),
]
history = model.fit_generator(
generator=generator.batch_generator(is_train=True),
epochs=config.N_EPOCHS,
steps_per_epoch=100,
validation_data=generator.batch_generator(is_train=False),
validation_steps=10,
verbose=1,
shuffle=False,
callbacks=callbacks)