Does anyone know how to convert a pytorch model (with ModuleDict member variable) into C++ through libtorch tracing?
The main problem is that its forward
function accept only ONE channel_tensor
. To make the whole C++ model to work, we need to trace the model with ALL possible channel_tensor
.
I can trace one channel with id = 1 like below, but how shall we combine together traced model for channel_id = 2, 3, 4, 5?
channel = torch.ones(1, dtype=torch.int64)
traced_script_module = torch.jit.trace(model, (premise, premise_length, hypotheses, hypotheses_length, channel))
output = traced_script_module(premise, premise_length, hypotheses, hypotheses_length, channel)
traced_script_module.save('deploy-trace-multitask.pt')
Snippet of model definition:
self._word_embedding = nn.Embedding(self.vocab_size,
self.embedding_dim,
padding_idx=padding_idx,
_weight=embeddings)
if self.dropout:
self._rnn_dropout = RNNDropout(p=self.dropout) #shared by all tasks
# self._rnn_dropout = nn.Dropout(p=self.dropout)
self._encoding = Seq2SeqEncoder(nn.LSTM,
self.embedding_dim,
self.hidden_size,
bidirectional=True)
#multi-task
self._attention = nn.ModuleDict({})
self._projection = nn.ModuleDict({})
self._classification = nn.ModuleDict({})
for channel in channels_list:
self.update(channel)
# Initialize all weights and biases in the model.
self.apply(_init_esim_weights)
def update(self, channel):
channel = str(channel)
self._attention.update({channel : SoftmaxAttention()})
self._projection.update({channel : nn.Sequential(nn.Linear(4*2*self.hidden_size, self.hidden_size), nn.ReLU())})
self._classification.update({channel : nn.Sequential(nn.Dropout(p=self.dropout),
nn.Linear(4*self.hidden_size,
self.hidden_size),
nn.Tanh(),
nn.Dropout(p=self.dropout),
nn.Linear(self.hidden_size,
self.num_classes))})
def forward(self,
premises,
premises_lengths,
hypotheses,
hypotheses_lengths,
channel_tensor): #must be a tensor
"""
Args:
premises: A batch of varaible length sequences of word indices
representing premises. The batch is assumed to be of size
(batch, premises_length).
premises_lengths: A 1D tensor containing the lengths of the
premises in 'premises'.
hypothesis: A batch of varaible length sequences of word indices
representing hypotheses. The batch is assumed to be of size
(batch, hypotheses_length).
hypotheses_lengths: A 1D tensor containing the lengths of the
hypotheses in 'hypotheses'.
Returns:
logits: A tensor of size (batch, num_classes) containing the
logits for each output class of the model.
probabilities: A tensor of size (batch, num_classes) containing
the probabilities of each output class in the model.
"""
channel_id = channel_tensor.item()
channel = str(channel_id)
premises_mask = get_mask(premises, premises_lengths).to(self.device)
hypotheses_mask = get_mask(hypotheses, hypotheses_lengths)\
.to(self.device)
embedded_premises = self._word_embedding(premises)
embedded_hypotheses = self._word_embedding(hypotheses)
if self.dropout:
embedded_premises = self._rnn_dropout(embedded_premises)
embedded_hypotheses = self._rnn_dropout(embedded_hypotheses)
encoded_premises = self._encoding(embedded_premises,
premises_lengths)
encoded_hypotheses = self._encoding(embedded_hypotheses,
hypotheses_lengths)
attended_premises, attended_hypotheses =\
self._attention[channel](encoded_premises, premises_mask,
encoded_hypotheses, hypotheses_mask)
""" rest of the code are omitted """