When I run optimizer.step
on my code, I get this error
RuntimeError: sqrt not implemented for 'torch.LongTensor'
C:\Program Files\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
C:\Program Files\Anaconda3\lib\site-packages\IPython\core\magics\execution.py in time(self, line, cell, local_ns)
1178 else:
1179 st = clock2()
-> 1180 exec(code, glob, local_ns)
1181 end = clock2()
1182 out = None
<timed exec> in <module>()
C:\Program Files\Anaconda3\lib\site-packages\torch\optim\adam.py in step(self, closure)
98 denom = max_exp_avg_sq.sqrt().add_(group['eps'])
99 else:
--> 100 denom = exp_avg_sq.sqrt().add_(group['eps'])
101
102 bias_correction1 = 1 - beta1 ** state['step']
RuntimeError: sqrt not implemented for 'torch.LongTensor'
I am using my own loss function. My question is how will I debug this error? Is there a quick way to see the type of all my variables? I am manually doing it and all of them are type float (including the output of my custom loss). I can't figure out why we are even getting an error related to a LongTensor. How does the optimizer.step function work in PyTorch?
Just in case, below is most of the code. This is the model:
class LSTM(nn.Module):
def __init__(self, mel_channels=40, frames=81, hidden_dim=768, proj_dim=256):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.mel_channels = mel_channels
self.frames = frames
self.proj_dims = proj_dim
weight = torch.tensor([10])
bias = torch.tensor([-5])
self.w = nn.Parameter(weight)
self.b = nn.Parameter(bias)
# The LSTM takes word embeddings as inputs, and outputs hidden states
# with dimensionality hidden_dim.
self.lstm1 = nn.LSTM(mel_channels, hidden_dim, batch_first=False)
print("here1")
self.lstm2 = nn.LSTM(proj_dim, hidden_dim, batch_first=False)
self.lstm3 = nn.LSTM(proj_dim, hidden_dim, batch_first=False)
self.lstms = [self.lstm1, self.lstm2, self.lstm3]
self.proj1 = nn.Linear(hidden_dim, proj_dim)
self.proj2 = nn.Linear(hidden_dim, proj_dim)
self.proj3 = nn.Linear(hidden_dim, proj_dim)
self.projs = [self.proj1, self.proj2, self.proj3]
def init_states(self, batchsize):
# Before we've done anything, we dont have any hidden state.
# Refer to the Pytorch documentation to see exactly
# why they have this dimensionality.
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return [(torch.zeros(1, batchsize, self.hidden_dim),
torch.zeros(1, batchsize, self.hidden_dim)),
(torch.zeros(1, batchsize, self.hidden_dim),
torch.zeros(1, batchsize, self.hidden_dim)),
(torch.zeros(1, batchsize, self.hidden_dim),
torch.zeros(1, batchsize, self.hidden_dim)),
]
def forward(self, inputs, states=None):
time, batchsize, inputdim = list(inputs.shape)
if states is None:
states = self.init_states(batchsize)
output = inputs
print(output.type())
for i in range(3):
print(output.type())
output, state = self.lstms[i](output, states[i])
output = self.projs[i](output)
# perform normalization on this output here
output = output[-1]
print(output.type())
output = F.normalize(output, p=2, dim=-1)
print(output.type())
self.state = state
print(output.type())
return output
def get_w(self):
print(get_w.type())
return(self.w)
def get_b(self):
print(get_b.type())
return(self.b)
def get_state(self):
print(get_state())
return(self.state)
This is the custom loss:
class CustomLoss(_Loss):
def __init__(self, size_average=True, reduce=True):
super(CustomLoss, self).__init__(size_average, reduce)
def forward(self, S, N, M, type='softmax',):
return self.loss_cal(S, N, M, type)
def loss_cal(self, S, N, M, type="softmax",):
self.A = torch.cat([S[i * M:(i + 1) * M, i:(i + 1)]
for i in range(N)], dim=0)
if type == "softmax":
self.B = torch.log(torch.sum(torch.exp(S.float()), dim=1, keepdim=True) + 1e-8)
total = torch.abs(torch.sum(self.A - self.B))
else:
raise AssertionError("loss type should be softmax or contrast !")
return total
Finally, this is the main file
model=LSTM()
optimizer = optim.Adam(list(model.parameters()), lr=LEARNING_RATE)
model = model.to(device)
best_loss = 100.
generator = SpeakerVerificationDataset()
dataloader = DataLoader(generator, batch_size=4,
shuffle=True, num_workers=0)
loss_history = []
update_counter = 1
for epoch in range(NUM_EPOCHS):
print("Epoch # : ", epoch + 1)
for step in range(STEPS_PER_EPOCH):
# get batch dataset
for i_batch, sample_batched in enumerate(dataloader):
print(sample_batched['MelData'].size())
inputs = sample_batched['MelData'].float()
inputs=sample_batched['MelData'].view(180, M*N, 40).float()
print((inputs.size()))
inputs = inputs
#print(here)
# remove previous gradients
optimizer.zero_grad()
# get gradients and loss at this iteration
#predictions,state,w,b = model(inputs)
predictions = model(inputs)
w = model.w
b = model.b
predictions = similarity(output=predictions,w=w,b=b)
#loss = CustomLoss()
S = predictions
loss_func = CustomLoss()
loss = loss_func.loss_cal(S=S,N=N,M=M)
loss.backward()
# update the weights
print("start optimizing")
optimizer.step()
loss_history.append(loss.item())
print(update_counter, ":", loss_history[-1])
update_counter += 1
print()
# save the weights
torch.save(model.state_dict(), CHECKPOINT_PATH)
print("Saving weights")
print()
print()
The error comes from here:
Had to change it to