Debug Pytorch Optimizer

2019-08-14 07:08发布

When I run optimizer.step on my code, I get this error

RuntimeError: sqrt not implemented for 'torch.LongTensor'

C:\Program Files\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
    186     # but it's overkill for just that one bit of state.
    187     def magic_deco(arg):
--> 188         call = lambda f, *a, **k: f(*a, **k)
    189 
    190         if callable(arg):

C:\Program Files\Anaconda3\lib\site-packages\IPython\core\magics\execution.py in time(self, line, cell, local_ns)
   1178         else:
   1179             st = clock2()
-> 1180             exec(code, glob, local_ns)
   1181             end = clock2()
   1182             out = None

<timed exec> in <module>()

C:\Program Files\Anaconda3\lib\site-packages\torch\optim\adam.py in step(self, closure)
     98                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
     99                 else:
--> 100                     denom = exp_avg_sq.sqrt().add_(group['eps'])
    101 
    102                 bias_correction1 = 1 - beta1 ** state['step']

RuntimeError: sqrt not implemented for 'torch.LongTensor'

I am using my own loss function. My question is how will I debug this error? Is there a quick way to see the type of all my variables? I am manually doing it and all of them are type float (including the output of my custom loss). I can't figure out why we are even getting an error related to a LongTensor. How does the optimizer.step function work in PyTorch?

Just in case, below is most of the code. This is the model:

class LSTM(nn.Module):

    def __init__(self, mel_channels=40, frames=81, hidden_dim=768, proj_dim=256):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.mel_channels = mel_channels
        self.frames = frames
        self.proj_dims = proj_dim

        weight = torch.tensor([10])
        bias = torch.tensor([-5])
        self.w = nn.Parameter(weight)
        self.b = nn.Parameter(bias)


        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm1 = nn.LSTM(mel_channels, hidden_dim, batch_first=False)
        print("here1")
        self.lstm2 = nn.LSTM(proj_dim, hidden_dim, batch_first=False)
        self.lstm3 = nn.LSTM(proj_dim, hidden_dim, batch_first=False)
        self.lstms = [self.lstm1, self.lstm2, self.lstm3]

        self.proj1 = nn.Linear(hidden_dim, proj_dim)
        self.proj2 = nn.Linear(hidden_dim, proj_dim)
        self.proj3 = nn.Linear(hidden_dim, proj_dim)
        self.projs = [self.proj1, self.proj2, self.proj3]

    def init_states(self, batchsize):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return [(torch.zeros(1, batchsize, self.hidden_dim),
                 torch.zeros(1, batchsize, self.hidden_dim)),
                (torch.zeros(1, batchsize, self.hidden_dim),
                 torch.zeros(1, batchsize, self.hidden_dim)),
                (torch.zeros(1, batchsize, self.hidden_dim),
                 torch.zeros(1, batchsize, self.hidden_dim)),
                ]

    def forward(self, inputs, states=None):
        time, batchsize, inputdim = list(inputs.shape)

        if states is None:
            states = self.init_states(batchsize)
        output = inputs

        print(output.type())

        for i in range(3):
            print(output.type())
            output, state = self.lstms[i](output, states[i])
            output = self.projs[i](output)
        # perform normalization on this output here
        output = output[-1]
        print(output.type())
        output = F.normalize(output, p=2, dim=-1)
        print(output.type())

        self.state = state

        print(output.type())

        return output

    def get_w(self):
        print(get_w.type())
        return(self.w)

    def get_b(self):
        print(get_b.type())
        return(self.b)

    def get_state(self):
        print(get_state())
        return(self.state)

This is the custom loss:

class CustomLoss(_Loss):

    def __init__(self,  size_average=True, reduce=True):

        super(CustomLoss, self).__init__(size_average, reduce)



    def forward(self, S, N, M, type='softmax',):


        return self.loss_cal(S, N, M, type)


    def loss_cal(self, S, N, M, type="softmax",):

        self.A = torch.cat([S[i * M:(i + 1) * M, i:(i + 1)]
                           for i in range(N)], dim=0)

        if type == "softmax":
            self.B = torch.log(torch.sum(torch.exp(S.float()), dim=1, keepdim=True) + 1e-8)
            total = torch.abs(torch.sum(self.A - self.B))
        else:
            raise AssertionError("loss type should be softmax or contrast !")
        return total

Finally, this is the main file

model=LSTM()
optimizer = optim.Adam(list(model.parameters()), lr=LEARNING_RATE)
model = model.to(device)

best_loss = 100.
generator = SpeakerVerificationDataset()
dataloader = DataLoader(generator, batch_size=4,
                        shuffle=True, num_workers=0)

loss_history = []
update_counter = 1

for epoch in range(NUM_EPOCHS):
    print("Epoch # : ", epoch + 1)

    for step in range(STEPS_PER_EPOCH):
        # get batch dataset
        for i_batch, sample_batched in enumerate(dataloader):
            print(sample_batched['MelData'].size())
            inputs = sample_batched['MelData'].float()
            inputs=sample_batched['MelData'].view(180, M*N, 40).float()
            print((inputs.size()))
            inputs = inputs
            #print(here)
            # remove previous gradients
            optimizer.zero_grad()

            # get gradients and loss at this iteration
            #predictions,state,w,b = model(inputs)
            predictions = model(inputs)
            w = model.w
            b = model.b
            predictions = similarity(output=predictions,w=w,b=b)
            #loss = CustomLoss()
            S = predictions

            loss_func = CustomLoss()          
            loss = loss_func.loss_cal(S=S,N=N,M=M)
            loss.backward()


            # update the weights
            print("start optimizing")
            optimizer.step()

            loss_history.append(loss.item())
            print(update_counter, ":", loss_history[-1])

            update_counter += 1
    print()

    # save the weights
    torch.save(model.state_dict(), CHECKPOINT_PATH)

    print("Saving weights")
    print()
print()

标签: pytorch
1条回答
贪生不怕死
2楼-- · 2019-08-14 07:24

The error comes from here:

weight = torch.tensor([10])
bias = torch.tensor([-5])
self.w = nn.Parameter(weight)
self.b = nn.Parameter(bias)

Had to change it to

weight = torch.tensor([10.0])
bias = torch.tensor([-5.0])
self.w = nn.Parameter(weight)
self.b = nn.Parameter(bias)
查看更多
登录 后发表回答