I would like to experiment with some activation functions that do not have a classical derivative (i.e. derivative contains Dirac pulses, not very useful for gradient descent type of learning). Therefore I need to define and add to the set of activation functions for Keras and/or TFLearn. And not just a new activation function but also its defined-by-me derivative. Creating a "fake" derivative has already been used in binarized neural networks. So, I found this and I parroted it to define my own:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
def nsign(x):
if x<0.0:
return -1.0
else:
return 1.0
np_nsign = np.vectorize(nsign)
def d_nsign(x):
if x>-1.0 and x<1.0:
return 1.0
else:
return 0.0
np_d_nsign = np.vectorize(d_nsign)
np_d_nsign_32 = lambda x: np_d_nsign(x).astype(np.float32)
def tf_d_nsign(x,name=None):
with ops.op_scope([x], name, "d_nsign") as name:
y = tf.py_func(np_d_nsign_32,
[x],
[tf.float32],
name=name,
stateful=False)
return y[0]
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
def nsigngrad(op, grad):
x = op.inputs[0]
n_gr = tf_d_nsign(x)
return grad * n_gr
np_nsign_32 = lambda x: np_nsign(x).astype(np.float32)
def tf_nsign(x, name=None):
with ops.op_scope([x], name, "nsign") as name:
y = py_func(np_nsign_32,
[x],
[tf.float32],
name=name,
grad=nsigngrad) # <-- here's the call to the gradient
return y[0]
with tf.Session() as sess:
x = tf.constant([-0.2,0.7,1.2,-1.7])
y = tf_nsign(x)
tf.initialize_all_variables().run()
print(x.eval(), y.eval(), tf.gradients(y, [x])[0].eval())
That worked, apart from TF mumbling about tf.op_scope being deprecated. Next step was to take the code above into Keras for something simple like the cifar10 demo code and change all the relus to n_sign, like this:
model.add(Activation(n_sign))
Unfortunately, that resulted in the following error :
Traceback (most recent call last):
File "cifar10_bin_tf.py", line 96, in <module>
model.add(Convolution2D(32, 3, 3))
File "/home/enzo/anaconda2/envs/neural/lib/python2.7/site-packages/keras/models.py", line 324, in add
output_tensor = layer(self.outputs[0])
File "/home/enzo/anaconda2/envs/neural/lib/python2.7/site-packages/keras/engine/topology.py", line 474, in __call__
self.assert_input_compatibility(x)
File "/home/enzo/anaconda2/envs/neural/lib/python2.7/site-packages/keras/engine/topology.py", line 415, in assert_input_compatibility
str(K.ndim(x)))
Exception: Input 0 is incompatible with layer convolution2d_2: expected ndim=4, found ndim=None
ndim sounds like "number of dimensions" which is inexplicable for what should be a scalar function. Found this other trick for TH, but once adapted, it causes a very similar error (complaint about ndim again).
Questions : 1) Can you suggest a fix for the above ?
2) Can you suggest another simple way to do what I want in something high level like Keras or TFLearn ?
3) Will the code above work in TFLearn ?
4) Will the code above work if I convert the cifar10 directly to TF and use it there ?
Obviously I'd like to avoid 4) as it means more struggling and conversions.
Thank you very much in advance.