I have successfully trained the following PyTorch model using Hugging Face’s bert-base-uncased:
class BERTGRUSentiment(nn.Module):
def __init__(self,
bert,
hidden_dim,
output_dim,
n_layers,
bidirectional,
dropout):
super().__init__()
self.bert = bert
embedding_dim = bert.config.to_dict()['hidden_size']
self.rnn = nn.GRU(embedding_dim,
hidden_dim,
num_layers = n_layers,
bidirectional = bidirectional,
batch_first = True,
dropout = 0 if n_layers < 2 else dropout)
self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
# text = [batch size, sent len]
with torch.no_grad():
embedded = self.bert(text)[0]
#embedded = [batch size, sent len, emb dim]
_, hidden = self.rnn(embedded)
#hidden = [n layers * n directions, batch size, emb dim]
if self.rnn.bidirectional:
hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
else:
hidden = self.dropout(hidden[-1,:,:])
#hidden = [batch size, hid dim]
output = self.out(hidden)
#output = [batch size, out dim]
return output
I am now trying to use tvm to compile the model to relay as follows:
bert = model.BERTGRUSentiment(bert, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)
bert.to(device)
print(f"cuda: {next(bert.parameters()).is_cuda}")
input_name = "text"
input_shape = [1, embedding_dim]
shape_list = [(input_name, input_shape)]
example = model.preprocess(model.tokenizer, "it was ok").unsqueeze(0)
scripted_model = torch.jit.trace(bert, example).eval()
print(scripted_model)
print("starting")
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
print(mod)
target = tvm.target.cuda()
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, params=params)
However, whenever I try this, I get the following error calling relay.frontend.from_pytorch
Traceback (most recent call last):
File “compile.py”, line 40, in mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 5002, in from_pytorch outputs = converter.convert_operators(operator_nodes, outputs, ret_name) File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 4267, in convert_operators _get_input_types(op_node, outputs, default_dtype=self.default_dtype), File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 1782, in linear [inputs[0], _op.transpose(inputs[1], axes=(1, 0))], input_types[:2] File “/home/moe/tvm/python/tvm/relay/frontend/pytorch.py”, line 1973, in matmul raise AssertionError(msg) AssertionError: Tensors being multiplied do not have compatible shapes.
This even though the model runs and trains fine. Is there a reason for this?