When the forward
function of my neural network (after the training phase is completed) is being executed, I'm experiencing RuntimeError: Expected object of backend CUDA but got backend CPU for argument #4 'mat1'.
The error trace indicates the error happens due to the call of output = self.layer1(x)
command. I have tried to move all the data of the tensors to my GPU. It seems I miss something to be moved as well.
Here is the code I have tried:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(NeuralNet, self).__init__()
self.layer1 = nn.Linear(input_size, hidden_size).cuda(device)
self.layer2 = nn.Linear(hidden_size, output_size).cuda(device)
self.relu = nn.ReLU().cuda(device)
def forward(self, x):
output = self.layer1(x) # throws the error
output = self.relu(output)
output = self.layer2(output)
return output
def main():
transform = transforms.Compose([
mnist_trainset = datasets.MNIST(root='D:\\MNIST', train=True, download=False, transform=transform)
mnist_testset = datasets.MNIST(root='D:\\MNIST', train=False, download=False, transform=transform)
train_loader = DataLoader(dataset=mnist_trainset, batch_size=100, shuffle=True)
test_loader = DataLoader(dataset=mnist_testset, batch_size=100, shuffle=False)
input_size = 784
hidden_size = 500
output_size = 10
num_epochs = 5
learning_rate = 0.001
model = NeuralNet(input_size, hidden_size, output_size)
lossFunction = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
losses_in_epochs = []
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
images = images.reshape(-1, 28 * 28)
out = model(images)
loss = lossFunction(out, labels)
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step,
if (i % 600) == 0:
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 28 * 28)
out = model(images)
_, predicted = torch.max(out.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
if __name__ == '__main__':
The software stack:
Python 3.7.1
torch 1.0.1 (with Cuda 9.0)
Windows 10 64-bit
The error only happens only at the testing step, when you try calculating the accuracy, this might already give you a hint. The training loop runs without a problem.
The error is simply that you don't send the images and labels to the GPU at this step. This is your corrected evaluation loop:
BTW you don't need to send all your layers to the GPU separately (at your class
). It's better to just send the whole instantiated model to the gpu at once.