Tensorflow summery merge error : Shape [-1,784] ha

I am trying to get summary of a training process of the neural net below.

import tensorflow as tf 
import numpy as np 

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets(".\MNIST",one_hot=True)

# Create the model
def train_and_test(hidden1,hidden2, learning_rate, epochs, batch_size):

    with tf.name_scope("first_layer"):
        input_data = tf.placeholder(tf.float32, [batch_size, 784], name = "input")
        weights1  = tf.Variable(
        tf.random_normal(shape =[784, hidden1],stddev=0.1),name = "weights")
        bias = tf.Variable(tf.constant(0.0,shape =[hidden1]), name = "bias")
        activation = tf.nn.relu(
        tf.matmul(input_data, weights1) + bias, name = "relu_act")
        tf.summary.histogram("first_activation", activation)

    with tf.name_scope("second_layer"):
        weights2  = tf.Variable(
        tf.random_normal(shape =[hidden1, hidden2],stddev=0.1),
        name = "weights")
        bias2 = tf.Variable(tf.constant(0.0,shape =[hidden2]), name = "bias")
        activation2 = tf.nn.relu(
        tf.matmul(activation, weights2) + bias2, name = "relu_act")
        tf.summary.histogram("second_activation", activation2)

    with tf.name_scope("output_layer"):
        weights3 = tf.Variable(
            tf.random_normal(shape=[hidden2, 10],stddev=0.5), name = "weights")
        bias3 = tf.Variable(tf.constant(1.0, shape =[10]), name = "bias")
        output = tf.add(
        tf.matmul(activation2, weights3, name = "mul"), bias3, name = "output")
        tf.summary.histogram("output_activation", output)
    y_ = tf.placeholder(tf.float32, [batch_size, 10])

    with tf.name_scope("loss"):
        cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=output))
        tf.summary.scalar("cross_entropy", cross_entropy)
    with tf.name_scope("train"):
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

    with tf.name_scope("tests"):
        correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    summary_op = tf.summary.merge_all()

    sess = tf.InteractiveSession()
    writer = tf.summary.FileWriter("./data", sess.graph)

    # Train
    for i in range(epochs):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
         _, summary = sess.run([train_step,summary_op], feed_dict={input_data: batch_xs, y_: batch_ys})

     if i % 10 ==0:
          test_xs, test_ys = mnist.train.next_batch(batch_size)
          test_accuracy = sess.run(accuracy, feed_dict = {input_data : test_xs, y_ : test_ys})
    return test_accuracy

if __name__ =="__main__":
print(train_and_test(500, 200, 0.001, 10000, 100))

I am testing the model every 10 step with a random batch of test data. The problem is in the summery writer. The sess.run() inside the for loop throws following error.

InvalidArgumentError (see above for traceback): Shape [-1,784] has negative dimensions
     [[Node: first_layer_5/input = Placeholder[dtype=DT_FLOAT, shape=[?,784], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

If I deleted all the summary writers and summary, the model runs fine. Can you help me spot the problem here? I tried manipulating the shapes of tensors but got nowhere.


From one comment of the deleted answer, from the original poster:

I actually build a neural net under with tf.Graph() as g. I removed the interactive session and started session as with tf.Session(g) as sess. It fixed the problem.

The graph g was not marked as the default graph that way, thus the session (tf.InteractiveSession in the original code) would use another graph instead.

Note that I stumbled upon here because of the same error message. In my case, I had accidentally something like this:

input_data = tf.placeholder(tf.float32, shape=(None, 50))
input_data = tf.tanh(input_data)
session.run(..., feed_dict={input_data: ...})

I.e. I didn't feed the placeholder. It seems that some other tensor operations can then result in this confusing error as internally an undefined dimension is represented as -1.


I was also having this problem. Searching around the basic consensus is to check for problems somewhere else in your code.

What fixed it for me was I was doing a sess.run(summary_op) without feeding in data for my placeholders.

Tensorflow seems to be a bit strange with placeholders, often they won't mind you not feeding them if you're trying to evaluate part of the graph that is independent of them. Here though, it did.


This has may have to do with the InteractiveSession initialization.

I initialized it at the beginning and then it worked - then initialized the global variables within the session.

I am unable to reproduce the error with the old code, which makes it unpredictable or caching settings somewhere.

import tensorflow as tf
sess = tf.InteractiveSession()

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

x = tf.placeholder(tf.float32, [None, 784])

W = tf.Variable(tf.zeros([784,10]))

b = tf.Variable(tf.zeros([10]))

y = tf.nn.softmax(tf.matmul(x, W)+b)

y_ = tf.placeholder(tf.float32, [None,10])

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy)

for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    #print batch_xs.shape, batch_ys.shape
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})