Computing Hessian in tensorflow is quite easy:
x = tf.Variable([1., 1., 1.], dtype=tf.float32, name="x")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
hessian = tf.hessians(f, x)
This correctly returns
[[ 8., 20., 4.],
[20., 34., 6.],
[ 4., 6., 2.]]
In my real case instead of using one single variable x
holding three values, I need to split it in two variables: x
(holding the first two) and y
(holding the last one).
x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
I tried a naive
hessian = tf.hessians(f, [x, y])
but I get: [[ 8., 20.], [20., 34.]], [[2.]]
I also tried:
xy = tf.concat([x, y], axis=-1)
but when defining the hessian
hessian = tf.hessians(f, xy)
I get a very bad error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
510 as_ref=input_arg.is_ref,
--> 511 preferred_dtype=default_dtype)
512 except TypeError as err:
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
1174 if ret is None:
-> 1175 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
303 _ = as_ref
--> 304 return constant(v, dtype=dtype, name=name)
305
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
244 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245 allow_broadcast=True)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
282 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283 allow_broadcast=allow_broadcast))
284 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
453 if values is None:
--> 454 raise ValueError("None values not supported.")
455 # if dtype is provided, forces numpy array to be the type
ValueError: None values not supported.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
524 observed = ops.internal_convert_to_tensor(
--> 525 values, as_ref=input_arg.is_ref).dtype.name
526 except ValueError as err:
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
1174 if ret is None:
-> 1175 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1176
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
303 _ = as_ref
--> 304 return constant(v, dtype=dtype, name=name)
305
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
244 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245 allow_broadcast=True)
246
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
282 value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283 allow_broadcast=allow_broadcast))
284 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
453 if values is None:
--> 454 raise ValueError("None values not supported.")
455 # if dtype is provided, forces numpy array to be the type
ValueError: None values not supported.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-358-70bce7e5d400> in <module>
3 f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
4 xy = tf.concat([x, y], axis=-1)
----> 5 hessian = tf.hessians(f, xy)
~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients_impl.py in hessians(ys, xs, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
1405 for gradient, x in zip(_gradients, xs):
1406 # change shape to one-dimension without graph branching
-> 1407 gradient = array_ops.reshape(gradient, [-1])
1408
1409 # Declare an iterator and tensor array loop variables for the gradients.
~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
7178 try:
7179 _, _, _op = _op_def_lib._apply_op_helper(
-> 7180 "Reshape", tensor=tensor, shape=shape, name=name)
7181 except (TypeError, ValueError):
7182 result = _dispatch.dispatch(
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
527 raise ValueError(
528 "Tried to convert '%s' to a tensor and failed. Error: %s" %
--> 529 (input_name, err))
530 prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
531 (input_name, op_type_name, observed))
ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.
1
EDIT: Here is a more fleshed out solution, essentially the same but for an arbitrary number of variables. Also I have added the option of using Python or TensorFlow loops for the Jacobian. Note the code assumes all variables are 1D tensors.
Output:
I'm not sure if there can be a "good" way of doing that with the current API. Obviously, you can compute the Hessian matrix elements by yourself... It is not very elegant and probably not the fastest solution either, but here is how it might be done in your example:
Output: