libtorch convert ModuleDict into C++

Does anyone know how to convert a pytorch model (with ModuleDict member variable) into C++ through libtorch tracing?

The main problem is that its forward function accept only ONE channel_tensor. To make the whole C++ model to work, we need to trace the model with ALL possible channel_tensor.

I can trace one channel with id = 1 like below, but how shall we combine together traced model for channel_id = 2, 3, 4, 5?

channel = torch.ones(1, dtype=torch.int64)
traced_script_module = torch.jit.trace(model, (premise, premise_length, hypotheses, hypotheses_length, channel))

output = traced_script_module(premise, premise_length, hypotheses, hypotheses_length, channel)
traced_script_module.save('deploy-trace-multitask.pt')

Snippet of model definition:

    self._word_embedding = nn.Embedding(self.vocab_size,
                                        self.embedding_dim,
                                        padding_idx=padding_idx,
                                        _weight=embeddings)

    if self.dropout:
        self._rnn_dropout = RNNDropout(p=self.dropout) #shared by all tasks
        # self._rnn_dropout = nn.Dropout(p=self.dropout)

    self._encoding = Seq2SeqEncoder(nn.LSTM,
                                    self.embedding_dim,
                                    self.hidden_size,
                                    bidirectional=True)

    #multi-task
    self._attention = nn.ModuleDict({})
    self._projection = nn.ModuleDict({})
    self._classification = nn.ModuleDict({})
    for channel in channels_list:
        self.update(channel)

    # Initialize all weights and biases in the model.
    self.apply(_init_esim_weights)

def update(self, channel):
    channel = str(channel)
    self._attention.update({channel : SoftmaxAttention()})

    self._projection.update({channel : nn.Sequential(nn.Linear(4*2*self.hidden_size, self.hidden_size), nn.ReLU())})

    self._classification.update({channel : nn.Sequential(nn.Dropout(p=self.dropout),
                                         nn.Linear(4*self.hidden_size,
                                                   self.hidden_size),
                                         nn.Tanh(),
                                         nn.Dropout(p=self.dropout),
                                         nn.Linear(self.hidden_size,
                                                   self.num_classes))})

def forward(self,
            premises,
            premises_lengths,
            hypotheses,
            hypotheses_lengths,
            channel_tensor): #must be a tensor
    """
    Args:
        premises: A batch of varaible length sequences of word indices
            representing premises. The batch is assumed to be of size
            (batch, premises_length).
        premises_lengths: A 1D tensor containing the lengths of the
            premises in 'premises'.
        hypothesis: A batch of varaible length sequences of word indices
            representing hypotheses. The batch is assumed to be of size
            (batch, hypotheses_length).
        hypotheses_lengths: A 1D tensor containing the lengths of the
            hypotheses in 'hypotheses'.

    Returns:
        logits: A tensor of size (batch, num_classes) containing the
            logits for each output class of the model.
        probabilities: A tensor of size (batch, num_classes) containing
            the probabilities of each output class in the model.
    """
    channel_id = channel_tensor.item()
    channel = str(channel_id)
    premises_mask = get_mask(premises, premises_lengths).to(self.device)
    hypotheses_mask = get_mask(hypotheses, hypotheses_lengths)\
        .to(self.device)

    embedded_premises = self._word_embedding(premises)
    embedded_hypotheses = self._word_embedding(hypotheses)

    if self.dropout:
        embedded_premises = self._rnn_dropout(embedded_premises)
        embedded_hypotheses = self._rnn_dropout(embedded_hypotheses)

    encoded_premises = self._encoding(embedded_premises,
                                      premises_lengths)
    encoded_hypotheses = self._encoding(embedded_hypotheses,
                                        hypotheses_lengths)

    attended_premises, attended_hypotheses =\
        self._attention[channel](encoded_premises, premises_mask,
                        encoded_hypotheses, hypotheses_mask)
    """ rest of the code are omitted """