XOR neural network does not learn

2019-06-03 06:22发布

问题:

I am trying to solve the very simple non-linear problem. It is XOR gate. I my school knowledge. XOR can be solve by using 2 input nodes, 2 hidden layer nodes. And 1 output. It is binary classification problem.

I generate the 1000 of random integer number it is 0 or 1 and then do backpropagation. But for some unknown reason my network has not learned anything. The training accuracy is constant at 50.

# coding: utf-8
import matplotlib
import torch
import torch.nn as nn
from torch.autograd import Variable

matplotlib.use('TkAgg')  # My buggy OSX 10.13.6 requires this
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from tqdm import tqdm
import random

N = 1000
batch_size = 10
epochs = 40
hidden_size = 2
output_size = 1
lr = 0.1


def return_xor(N):
    tmp_x = []
    tmp_y = []
    for i in range(N):
        a = (random.randint(0, 1) == 1)
        b = (random.randint(0, 1) == 1)
        if (a and not b) or (not a and b):
            q = True
        else:
            q = False
        input_features = (a, b)
        output_class = q
        tmp_x.append(input_features)
        tmp_y.append(output_class)
    return tmp_x, tmp_y


# In[495]:


# Training set
x, y = return_xor(N)
x = torch.tensor(x, dtype=torch.float, requires_grad=True)
y = torch.tensor(y, dtype=torch.float, requires_grad=True)
# Test dataset
x_test, y_test = return_xor(100)
x_test = torch.tensor(x_test)
y_test = torch.tensor(y_test)


class MyDataset(Dataset):
    """Define my own `Dataset` in order to use `Variable` with `autograd`"""

    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return len(self.x)


dataset = MyDataset(x, y)
test_dataset = MyDataset(x_test, y_test)

print(dataset.x.shape)
print(dataset.y.shape)

# Make data iterable by loading to a loader. Shuffle, batch_size kwargs put them here in order to remind I myself
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

print(f"They are {len(train_loader)} batches in the dataset")
shown = 0
for (x, y) in train_loader:
    if shown == 1:
        break
    print(f"{x.shape} {x.dtype}")
    print(f"{y.shape} {y.dtype}")
    shown += 1


class MyModel(nn.Module):
    """
    Binary classification
    2 input nodes
    2 hidden nodes
    1 output node
    """

    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, output_size)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, out):
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out


# Create my network
net = MyModel(dataset.x.shape[1], hidden_size, output_size)
CUDA = torch.cuda.is_available()
if CUDA:
    net = net.cuda()
criterion = torch.nn.BCELoss(reduction='elementwise_mean')
optimizer = torch.optim.SGD(net.parameters(), lr=lr)

# Train the network
correct_train = 0
total_train = 0
for epoch in range(epochs):
    for i, (batches, labels) in enumerate(train_loader):
        batcesh = Variable(batches.float())
        labels = Variable(labels.float())
        output = net(batches)  # Forward pass
        optimizer.zero_grad()

        loss = criterion(output, labels.view(10, 1))
        loss.backward()
        optimizer.step()
        total_train += labels.size(0)
        correct_train += (predicted == labels.long()).sum()
        if (i + 1) % 10 == 0:
            print(f"""
                Epoch {epoch+1}/{epochs}, 
                Iteration {i+1}/{len(dataset)//batch_size}, 
                Training Loss: {loss.item()},
                Training Accuracy: {100*correct_train/total_train}
              """)

Solution:
I did initialized weight, Adaptive learning rate https://github.com/elcolie/nnbootcamp/blob/master/Study-XOR.ipynb

回答1:

I am not sure what results you are getting, as the code you have posted in the question doesn't work (It gives errors with pytorch 0.4.1 like predicted not defined etc). But syntax issues apart, there are other problems.

Your model is not actually two layer as it does not use non-linearity after the first output. Effectively this is one layer network and to fix that you can modify your model's forward as follows:

def forward(self, out):
    out = torch.nn.functional.relu(self.fc1(out))
    out = self.fc2(out)
    out = self.sigmoid(out)
    return out

You can try sigmoid or tanh non-linearity as well... but the non-linearity is a must. This should fix the problem.

I also see that you are using only 2 hidden units. This might be restrictive and you might want to increase that to something like 5 or 10.