Trouble compiling BERT model

I have successfully trained the following PyTorch model using Hugging Face’s bert-base-uncased:

class BERTGRUSentiment(nn.Module):
def __init__(self,
             bert,
             hidden_dim,
             output_dim,
             n_layers,
             bidirectional,
             dropout):
    
    super().__init__()
    
    self.bert = bert
    
    embedding_dim = bert.config.to_dict()['hidden_size']
    
    self.rnn = nn.GRU(embedding_dim,
                      hidden_dim,
                      num_layers = n_layers,
                      bidirectional = bidirectional,
                      batch_first = True,
                      dropout = 0 if n_layers < 2 else dropout)
    
    self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
    
    self.dropout = nn.Dropout(dropout)
    
def forward(self, text):
    
    # text = [batch size, sent len]
            
    with torch.no_grad():
        embedded = self.bert(text)[0]
            
    #embedded = [batch size, sent len, emb dim]
    
    _, hidden = self.rnn(embedded)
    
    #hidden = [n layers * n directions, batch size, emb dim]
    
    if self.rnn.bidirectional:
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
    else:
        hidden = self.dropout(hidden[-1,:,:])
            
    #hidden = [batch size, hid dim]
    
    output = self.out(hidden)
    
    #output = [batch size, out dim]
    
    return output

I am now trying to use tvm to compile the model to relay as follows:

bert = model.BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
bert.to(device)

print(f"cuda: {next(bert.parameters()).is_cuda}")

input_name = "text"
input_shape = [1, embedding_dim]

shape_list = [(input_name, input_shape)]

example = model.preprocess(model.tokenizer, "it was ok").unsqueeze(0)

scripted_model = torch.jit.trace(bert, example).eval()

print(scripted_model)

print("starting")

mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

print(mod)

target = tvm.target.cuda()

with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

However, whenever I try this, I get the following error calling relay.frontend.from_pytorch

Traceback (most recent call last):

File “compile.py”, line 40, in mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 5002, in from_pytorch outputs = converter.convert_operators(operator_nodes, outputs, ret_name) File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 4267, in convert_operators _get_input_types(op_node, outputs, default_dtype=self.default_dtype), File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 1782, in linear [inputs[0], _op.transpose(inputs[1], axes=(1, 0))], input_types[:2] File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 1973, in matmul raise AssertionError(msg) AssertionError: Tensors being multiplied do not have compatible shapes.

This even though the model runs and trains fine. Is there a reason for this?

It seems that this error is occurring the GRU layer. The mismatched dimensions are (2, 2, 256) and (1, 512). Could the issue be related to the concatenation?