Keras ImageDataGenerator: why are the outputs of m

I'm trying to code a CNN which distinguishes between cats and dogs. I have set my labels such that dog:0 and cat:1, so I'm expecting my CNN to output a 0 if it's a dog and 1 if it's a cat. However, it is doing the opposite instead (giving a 0 when its a cat and a 1 for a dog). Please review my code and look where I went wrong. Thanks

I'm currently on python 3.6.8, using jupyter notebook (all the code inside is me copy-pasting different parts of the code from the jupyter notebook)

import os
import cv2
from random import shuffle
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
%matplotlib inline

train_dir = r'C:\Users\tohho\Desktop\Python pypipapp\Machine Learning\data\PetImages\train'
test_dir = r'C:\Users\tohho\Desktop\Python pypipapp\Machine Learning\data\PetImages\test1'
IMG_WIDTH = 100
IMG_HEIGHT = 100
batch_size = 32



######## THIS IS WHERE I LABELLED 0 FOR DOG AND 1 FOR CAT ##########
filenames = os.listdir(train_dir)
categories = [] 
for filename in filenames:
    category = filename.split('.')[0]
    if category == 'cat':
        categories.append(1)
    elif category == 'dog':
        categories.append(0)

df = pd.DataFrame({'filename':filenames, 'class':categories}) # making the dataframe

#### I SPLIT THE DATA INTO TRAIN AND VALIDATION DATASETS ####
df_train, df_validate = train_test_split(df, test_size=0.25) # splitting data for train/test
 # need to reset index for both dataframs so imagedatagenerator works properly
df_train = df_train.reset_index(drop=True)
df_validate = df_validate.reset_index(drop=True)

print(df_train['class'].value_counts())
print(df_validate['class'].value_counts())

len_training = df_train.shape[0]
len_validate = df_validate.shape[0]
print('{} training eg, {} test eg'.format(len_training, len_validate))



#### CREATE IMAGE DATA GENERATORS ####
train_datagen = ImageDataGenerator(rescale=1./255,
                               shear_range = 0.2,
                               zoom_range = 0.2,
                               horizontal_flip = True)
# our train_datagen generator will use the following transformations on the images
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(df_train, 
                                                    train_dir,
                                                    target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                    batch_size=batch_size,
                                                    x_col='filename',
                                                    y_col='class',
                                                    class_mode = 'binary')

# generator = ImageDataGenerator(*args).flow_from_dataframe(dataframe, directory, target_size,
# batch_size, x_col, y_col, class_mode)
# your dataframe shoudl be in the format such that x_col = features, y_col = class/label
# binary class mode since output is either 0(dog) or 1(cat)

validation_generator = validation_datagen.flow_from_dataframe(df_validate, 
                                                   train_dir,
                                                    target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                    x_col='filename',
                                                    y_col='class',
                                                    class_mode='binary', 
                                                  batch_size=batch_size)

########## BUILDING MODEL ############
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3,3), input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(128, (3,3), input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten()) # remember to flatten conv2d to dense layer
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.4))

model.add(Dense(1))
model.add(Activation('sigmoid')) 
# since we have only 1 output with range [0,1], we use sigmoid
# if there were n categories, use softmax

# binary_crossentropy since output is either 0,1
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

earlystop = EarlyStopping(monitor='val_loss', patience=3) # stops learning if val_loss doesnt improve
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.000001) 
# reduces learning rate if val_acc doesnt improve
callbacks = [earlystop, learning_rate_reduction]

##### FIT THE MODEL #####
epochs = 50
model.fit_generator(train_generator,
                   steps_per_epoch=len_training//batch_size,
                   verbose=1,
                   epochs=epochs,
                   validation_data=validation_generator,
                   validation_steps=len_validate//batch_size,
                   callbacks=callbacks) # fitting model


######### PREDICTING #############
output_generator = validation_datagen.flow_from_dataframe(df_output,
                                                   outputdir,
                                                   x_col='filename',
                                                   y_col=None,
                                                   class_mode=None,
                                                   target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                   shuffle=False,
                                                   batch_size=batch_size)
predictions = model.predict_generator(output_generator, 
                                      steps=np.ceil(len_output/batch_size))
df_output['probability'] = predictions
df_output['label'] = np.where(df_output['probability'] > 0.5, 'cat','dog')
df_output.head()

The CNN gives the opposite of the correct answer, and when reversing the outputs, I get the expected results (correct identification and accuracy). I know that just changing the line df_output['label'] = np.where(df_output['probability'] > 0.5, 'cat','dog') to df_output['label'] = np.where(df_output['probability'] < 0.5, 'cat','dog') settles the problem, but that's not helping me figure out why the output of the CNN is reversed.

The cause of your issue is subtle. I'll illustrate what is going on with a toy example. Suppose we instantiate a datagenerator with the following code:

# List of image paths, doesn't matter here
image_paths = ['./img_{}.png'.format(i) for i in range(5)] 
labels = ...  # List of labels

df = pd.DataFrame()
df['filename'] = image_paths
df['class'] = labels

generator = ImageDataGenerator().flow_from_dataframe(dataframe=df, 
                                                    directory='./',
                                                    x_col='filename',
                                                    y_col='class')

The ImageDataGenerator expects the class column in the dataframe to contain string labels associated with the images. Internally, it will map these labels to class integers. You can inspect this mapping by calling the class_indices attribute. After instantiating our generator with the following list of labels:

labels = ['cat', 'cat', 'cat', 'dog', 'dog']

the class_indices mapping will look as follows:

generator.class_indices
> {'cat': 0, 'dog': 1}

Let's instantiate the generator again, but change the label of the first image:

labels = ['dog', 'cat', 'cat', 'dog', 'dog']
# After re-instantiating the generator
generator.class_indices
> {'dog': 0, 'cat': 1}

The integer encoding of our classes is swapped, which suggests that the internal mapping of labels to class integers depends on the order in which the different classes are encountered.

You are mapping cat to 1 and dog to 0, but the ImageDataGenerator interprets these as label strings and maps them to integers internally.

Now what happens if the first image in your directory is a cat?

labels = [1, 0, 1, 0, 0] # ['cat', 'dog', 'cat', 'dog', 'dog']
# After re-instantiating the generator
generator.class_indices
> {1: 0, 0: 1}  # !

And this is the source of your confusion. :) To avoid this, either:

use 'cat' and 'dog' in the label column of your dataframe and let the ImageDataGenerator handle the mapping for you
pass a list of classes to the classes argument in the call to flow_from_dataframe to specify the mapping explicitly.