Tensorflow:using pandas_input_fn with TensorForest

2020-07-29 06:11发布

I am working on a simple Tensorflow programme, and build input pipeline with pandas.My code is below:

def train_rf(X,Y):

  print(X.shape)
  print(Y.shape)


  if os.path.isfile(config['rf']['model_dir']):
    shutil.rmtree(config['rf']['model_dir'])

  #features is the list of names of features
  params = tensor_forest.ForestHParams(
      num_classes=2,
      num_features=len(features),
      num_trees=config['rf']['num_trees'])

  est = random_forest.TensorForestEstimator(params, model_dir=config['rf']['model_dir'])

  train_input_fn = tf.estimator.inputs.pandas_input_fn(
      X,
      y=Y,
      batch_size=config['rf']['train_batch_size'],
      num_epochs=1,
      shuffle=True)

  est.fit(input_fn=train_input_fn, steps=100)

However, when I run this function, I got error like this:

Traceback (most recent call last):
  File "random_forest.py", line 118, in <module>
    train_rf(train_ohd[features].iloc[X_1],l(y_1),train_ohd[features].iloc[X_2])
  File "random_forest.py", line 44, in train_rf
    est.fit(input_fn=train_input_fn, steps=100)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 316, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 480, in fit
    loss = self._train_model(input_fn=input_fn, hooks=hooks)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 986, in _train_model
    model_fn_ops = self._get_train_ops(features, labels)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1202, in _get_train_ops
    return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1166, in _call_model_fn
    model_fn_results = self._model_fn(features, labels, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/client/random_forest.py", line 171, in _model_fn
    features)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/tensor_forest.py", line 489, in inference_graph
    data_ops.ParseDataTensorOrDict(input_data))
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tensor_forest/python/ops/data_ops.py", line 159, in ParseDataTensorOrDict
    processed_dense_features = array_ops.concat(dense_features, 1)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1099, in concat
    return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 706, in _concat_v2
    "ConcatV2", values=values, axis=axis, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2958, in create_op
    set_shapes_for_outputs(ret)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs
    shapes = shape_func(op)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring
    return call_cpp_shape_fn(op, require_shape_fn=True)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn
    require_shape_fn)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl
    raise ValueError(err.message)
ValueError: Shape must be at least rank 2 but is rank 1 for 'concat' (op: 'ConcatV2') with input shapes: [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [?], [] and with computed input tensors: input[131] = <1>.

The output of print(X.shape) and print(Y.shape) is (53443, 131), (53443,) respectively. So I got confused why there will be 132 elements in input and why I got this ValueError?

More information, when I used tf.contrib.learn.LinearRegressor to replace the TensorForestEstimator, I can train and eval model with no error. So can I assume this is a bug of Tensorflow?

1条回答
冷血范
2楼-- · 2020-07-29 06:24

The reply to the github issue can be found here:

https://github.com/tensorflow/tensorflow/issues/16692

basically the input DataFrame dimension has to be expanded:

# The wrapper.
def new_input_fn(X, Y, batch_size, num_epochs, shuffle):
    def internal_input_fn():
        features, labels =  tf.estimator.inputs.pandas_input_fn(X,
                                                                y=Y,
                                                                batch_size=batch_size,
                                                                num_epochs=num_epochs,
                                                                shuffle=shuffle,
                                                                target_column='y')()
        for name in features:
            tensor = features[name]
            # Expand dims of features
            tensor = tf.expand_dims(tensor, axis=1)
            features[name] =tensor
        return features, labels
    return internal_input_fn
new_input_fn = new_input_fn(X,Y, self.batch_size, 10, True)

est.fit(input_fn=new_input_fn, steps=1000)
查看更多
登录 后发表回答