Keras Kernel Initialization with numpy array unabl

2020-07-23 06:25发布

问题:

I am trying to use a numpy array from a pretrained model, to initialize a kernel in my keras model. Therefore I am writing my own Initializer function. The function is embedded in a callable class to avoid issues when using load_model. It seems that passing an array as a parameter to the initializer does not work in my case.

My current solution is inspired by: keras kernel initializers are called incorrectly when using load_model Where the answer provided works fine for me in the case of passing a single number parameter to the initializer function. Upon using an array the error occurs.

I have looked at solutions using set_weights as well, but they are not applicable in my case as I want only to initialize a part of the model.

I use the following imports:

import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout, Add
from keras import metrics, Input, Model, optimizers
from keras.utils.generic_utils import get_custom_objects
import keras.backend as K
from keras.initializers import Initializer

With the Initializer class:

class myInit( Initializer ):
    def __init__(self, matrix):
        self.matrix = matrix

    def __call__(self, shape, dtype=None):
    # array filled with matrix parameter'
        return K.variable(value = self.matrix, dtype=dtype )

    def get_config(self):
        return {
            'matrix' : self.matrix
        }

And the model as follows:

val = np.ones((2, 2))

input_l=Input(shape=(2,))
hidden=Dropout(rate=0.3,seed=0)(input_l)
x1 = Dense(2, kernel_initializer=myInit(val), 
                activation=None, )(hidden)
x2 = Dense(2, activation='relu')(hidden)
energy=Add()([x1,x2])
output=Activation('softmax')(energy)  
model = Model(input_l,output)

model.compile(loss='categorical_crossentropy', optimizer='adam' , metrics=['categorical_accuracy'])
model_info=model.get_config()

model.save("savedmodel_ex.h5")
model = load_model("savedmodel_ex.h5", custom_objects={'myInit':myInit})

I get the following error message when loading the model:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-20-99f620c51ed9> in <module>()
     12 
     13 model.save("savedmodel_ex.h5")
---> 14 model = load_model("savedmodel_ex.h5", custom_objects={'myInit':myInit})

/lib/python2.7/site-packages/keras/models.pyc in load_model(filepath, custom_objects, compile)
    268             raise ValueError('No model found in config file.')
    269         model_config = json.loads(model_config.decode('utf-8'))
--> 270         model = model_from_config(model_config, custom_objects=custom_objects)
    271 
    272         # set weights

/lib/python2.7/site-packages/keras/models.pyc in model_from_config(config, custom_objects)
    345                         'Maybe you meant to use '
    346                         '`Sequential.from_config(config)`?')
--> 347     return layer_module.deserialize(config, custom_objects=custom_objects)
    348 
    349 

/lib/python2.7/site-packages/keras/layers/__init__.pyc in deserialize(config, custom_objects)
     53                                     module_objects=globs,
     54                                     custom_objects=custom_objects,
---> 55                                     printable_module_name='layer')

/lib/python2.7/site-packages/keras/utils/generic_utils.pyc in deserialize_keras_object(identifier, module_objects, custom_objects, printable_module_name)
    142                 return cls.from_config(config['config'],
    143                                        custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) +
--> 144                                                            list(custom_objects.items())))
    145             with CustomObjectScope(custom_objects):
    146                 return cls.from_config(config['config'])

/lib/python2.7/site-packages/keras/engine/topology.pyc in from_config(cls, config, custom_objects)
   2533                 if layer in unprocessed_nodes:
   2534                     for node_data in unprocessed_nodes.pop(layer):
-> 2535                         process_node(layer, node_data)
   2536 
   2537         name = config.get('name')

/lib/python2.7/site-packages/keras/engine/topology.pyc in process_node(layer, node_data)
   2490             if input_tensors:
   2491                 if len(input_tensors) == 1:
-> 2492                     layer(input_tensors[0], **kwargs)
   2493                 else:
   2494                     layer(input_tensors, **kwargs)

/lib/python2.7/site-packages/keras/engine/topology.pyc in __call__(self, inputs, **kwargs)
    590                                          '`layer.build(batch_input_shape)`')
    591                 if len(input_shapes) == 1:
--> 592                     self.build(input_shapes[0])
    593                 else:
    594                     self.build(input_shapes)

/lib/python2.7/site-packages/keras/layers/core.pyc in build(self, input_shape)
    862                                       name='kernel',
    863                                       regularizer=self.kernel_regularizer,
--> 864                                       constraint=self.kernel_constraint)
    865         if self.use_bias:
    866             self.bias = self.add_weight(shape=(self.units,),

/lib/python2.7/site-packages/keras/legacy/interfaces.pyc in wrapper(*args, **kwargs)
     89                 warnings.warn('Update your `' + object_name +
     90                               '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91             return func(*args, **kwargs)
     92         wrapper._original_function = func
     93         return wrapper

/lib/python2.7/site-packages/keras/engine/topology.pyc in add_weight(self, name, shape, dtype, initializer, regularizer, trainable, constraint)
    411         if dtype is None:
    412             dtype = K.floatx()
--> 413         weight = K.variable(initializer(shape),
    414                             dtype=dtype,
    415                             name=name,

<ipython-input-17-463931c2b557> in __call__(self, shape, dtype)
      8     def __call__(self, shape, dtype=None):
      9     # array filled with matrix parameter'
---> 10         return K.variable(value = self.matrix, dtype=dtype )
     11 
     12     def get_config(self):

/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in variable(value, dtype, name, constraint)
    394         v._uses_learning_phase = False
    395         return v
--> 396     v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
    397     if isinstance(value, np.ndarray):
    398         v._keras_shape = value.shape

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in __call__(cls, *args, **kwargs)
    211   def __call__(cls, *args, **kwargs):
    212     if cls is VariableV1:
--> 213       return cls._variable_v1_call(*args, **kwargs)
    214     elif cls is Variable:
    215       return cls._variable_v2_call(*args, **kwargs)

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in _variable_v1_call(cls, initial_value, trainable, collections, validate_shape, caching_device, name, variable_def, dtype, expected_shape, import_scope, constraint, use_resource, synchronization, aggregation)
    174         use_resource=use_resource,
    175         synchronization=synchronization,
--> 176         aggregation=aggregation)
    177 
    178   def _variable_v2_call(cls,

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in <lambda>(**kwargs)
    153                         aggregation=VariableAggregation.NONE):
    154     """Call on Variable class. Useful to force the signature."""
--> 155     previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
    156     for getter in ops.get_default_graph()._variable_creator_stack:  # pylint: disable=protected-access
    157       previous_getter = _make_getter(getter, previous_getter)

/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.pyc in default_variable_creator(next_creator, **kwargs)
   2493         caching_device=caching_device, name=name, dtype=dtype,
   2494         constraint=constraint, variable_def=variable_def,
-> 2495         expected_shape=expected_shape, import_scope=import_scope)
   2496 
   2497 

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in __call__(cls, *args, **kwargs)
    215       return cls._variable_v2_call(*args, **kwargs)
    216     else:
--> 217       return super(VariableMetaclass, cls).__call__(*args, **kwargs)
    218 
    219 

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in __init__(self, initial_value, trainable, collections, validate_shape, caching_device, name, variable_def, dtype, expected_shape, import_scope, constraint)
   1393           dtype=dtype,
   1394           expected_shape=expected_shape,
-> 1395           constraint=constraint)
   1396 
   1397   def __repr__(self):

/lib/python2.7/site-packages/tensorflow/python/ops/variables.pyc in _init_from_args(self, initial_value, trainable, collections, validate_shape, caching_device, name, dtype, expected_shape, constraint)
   1513         else:
   1514           self._initial_value = ops.convert_to_tensor(
-> 1515               initial_value, name="initial_value", dtype=dtype)
   1516           # pylint: disable=protected-access
   1517           if self._initial_value.op._get_control_flow_context() is not None:

/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in convert_to_tensor(value, dtype, name, preferred_dtype)
   1037     ValueError: If the `value` is a tensor not of given `dtype` in graph mode.
   1038   """
-> 1039   return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
   1040 
   1041 

/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in convert_to_tensor_v2(value, dtype, dtype_hint, name)
   1095       name=name,
   1096       preferred_dtype=dtype_hint,
-> 1097       as_ref=False)
   1098 
   1099 

/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1173 
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 
   1177     if ret is NotImplemented:

/lib/python2.7/site-packages/tensorflow/python/framework/constant_op.pyc in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    302                                          as_ref=False):
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 
    306 

/lib/python2.7/site-packages/tensorflow/python/framework/constant_op.pyc in constant(value, dtype, shape, name)
    243   """
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 
    247 

/lib/python2.7/site-packages/tensorflow/python/framework/constant_op.pyc in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    281       tensor_util.make_tensor_proto(
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
    285   const_tensor = g.create_op(

/lib/python2.7/site-packages/tensorflow/python/framework/tensor_util.pyc in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    464       nparray = np.empty(shape, dtype=np_dt)
    465     else:
--> 466       _AssertCompatible(values, dtype)
    467       nparray = np.array(values, dtype=np_dt)
    468       # check to them.

/lib/python2.7/site-packages/tensorflow/python/framework/tensor_util.pyc in _AssertCompatible(values, dtype)
    369     else:
    370       raise TypeError("Expected %s, got %s of type '%s' instead." %
--> 371                       (dtype.name, repr(mismatch), type(mismatch).__name__))
    372 
    373 

TypeError: Expected float32, got {u'type': u'ndarray', u'value': [[1.0, 1.0], [1.0, 1.0]]} of type 'dict' instead.

How can I change my Initializer Class such that I can successfully initialize with an array and load the model afterwards?

回答1:

you have to use model.layers.set_weights([kernels,bias_vector])

compile the model with random weights, then set the weights afterward.

you can't set specific weights, you have to craft the entire weight vector and set the layer as a whole.