I'm facing issues in fitting a simple y= 4x1 line with 2 data points using pytorch. While running the inference code, the model seems to output same value to any input which is strange. Pls find the code attached along with the data files used by me. Appreciate any help here.
import torch
import numpy as np
import pandas as pd
df = pd.read_csv('data.csv')
test_data = pd.read_csv('test_data.csv')
inputs = df[['x1']]
target = df['y']
inputs = torch.tensor(inputs.values).float()
target = torch.tensor(target.values).float()
test_data = torch.tensor(test_data.values).float()
#Defining Network Architecture
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
hidden1 = 3
# hidden2 = 5
self.fc1 = nn.Linear(1,hidden1)
self.fc3 = nn.Linear(hidden1,1)
def forward(self,x):
x = F.relu(self.fc1(x))
x = self.fc3(x)
return x
#instantiate the model
model = Net()
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
model.train()
#epochs
epochs = 100
for x in range(epochs):
#initialize the training loss to 0
train_loss = 0
#clear out gradients
optimizer.zero_grad()
#calculate the output
output = model(inputs)
#calculate loss
loss = criterion(output,target)
#backpropagate
loss.backward()
#update parameters
optimizer.step()
if ((x%5)==0):
print('Training Loss after epoch {:2d} is {:2.6f}'.format(x,loss))
#set the model in evaluation mode
model.eval()
#Test the model on unseen data
test_output = model(test_data)
print(test_output)
Below is the model output
#model output
tensor([[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579]], grad_fn=<AddmmBackward>)
Your model is collapsing. You can probably see that based on the prints
. You may want to use a lower learning rate (1e-5, 1e-6, etc.). Switching from SGD(...)
to Adam(...)
may be easier if you do not have experience and want less trouble fine-tuning these hparams. Also, maybe 100 epochs is not enough. As you did not share an MCVE, I cannot tell you for sure what it is. Here is an MCVE of linefitting using the same Net
you used:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
epochs = 1000
max_range = 40
interval = 4
# DATA
x_train = torch.arange(0, max_range, interval).view(-1, 1).float()
x_train += torch.rand(x_train.size(0), 1) - 0.5 # small noise
y_train = (4 * x_train)
y_train += torch.rand(x_train.size(0), 1) - 0.5 # small noise
x_test = torch.arange(interval // 2, max_range, interval).view(-1, 1).float()
y_test = 4 * x_test
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
hidden1 = 3
self.fc1 = nn.Linear(1, hidden1)
self.fc3 = nn.Linear(hidden1, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc3(x)
return x
model = Net()
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)
# TRAIN
model.train()
for epoch in range(epochs):
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
if epoch % 10 == 0:
print('Training Loss after epoch {:2d} is {:2.6f}'.format(epoch, loss))
# TEST
model.eval()
y_pred = model(x_test)
print(torch.cat((x_test, y_pred, y_test), dim=-1))
This is what the data looks like:
And this is what the training looks like:
Training Loss after epoch 0 is 7416.805664
Training Loss after epoch 10 is 6645.655273
Training Loss after epoch 20 is 5792.936523
Training Loss after epoch 30 is 4700.106445
Training Loss after epoch 40 is 3245.384277
Training Loss after epoch 50 is 1779.370728
Training Loss after epoch 60 is 747.418579
Training Loss after epoch 70 is 246.781311
Training Loss after epoch 80 is 68.635155
Training Loss after epoch 90 is 17.332235
Training Loss after epoch 100 is 4.280161
Training Loss after epoch 110 is 1.170808
Training Loss after epoch 120 is 0.453974
...
Training Loss after epoch 970 is 0.232296
Training Loss after epoch 980 is 0.232090
Training Loss after epoch 990 is 0.231888
And this is what the output looks like:
| x_test | y_pred | y_test |
|:-------:|:--------:|:--------:|
| 2.0000 | 8.6135 | 8.0000 |
| 6.0000 | 24.5276 | 24.0000 |
| 10.0000 | 40.4418 | 40.0000 |
| 14.0000 | 56.3303 | 56.0000 |
| 18.0000 | 72.1884 | 72.0000 |
| 22.0000 | 88.0465 | 88.0000 |
| 26.0000 | 103.9047 | 104.0000 |
| 30.0000 | 119.7628 | 120.0000 |
| 34.0000 | 135.6210 | 136.0000 |
| 38.0000 | 151.4791 | 152.0000 |