Error while transforming Huggingface BERT to TIR

Nisbij · January 13, 2025, 4:27am

I am trying to convert a huggingface BERT model into a TIR to run some analysis on the operators and thhe fusion operators. Howeever I am running into an issue when generating a mod from the model.

import numpy as np 
import tvm from tvm 
import relax import torch 
import onnx from torch 
import nn from tvm 
import relay from torch.export 
import export from tvm.relax.frontend.torch 
import from_exported_program from tvm.tir.analysis 
import calculate_workspace_bytes from torchvision.models 
import resnet50 from transformers 
import BertModel, BertTokenizer from tvm.contrib 
import graph_executor from tvm 
import dlight as dl

```
# model = resnet50()
# model_weights_path = "/home/ubuntu/models/faas-share-test/MLPerf-based-workloads/resnet/models/resnet/resnet50.pth"

tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

# Load the model
model = BertModel.from_pretrained('bert-large-uncased')

if "embeddings.position_ids" not in model.state_dict():
    # Add position_ids to the state_dict
    position_ids = torch.arange(512).unsqueeze(0)  # Adjust size as needed
    model.embeddings.register_buffer("position_ids", position_ids)
    
# checkpoint = torch.load(model_weights_path)
# print(checkpoint['model'].keys())
# model.load_state_dict(torch.load(model_weights_path))

model.eval()  # Set the model to evaluation mode

# Step 3: Define a dummy input with the correct shape for ResNet-50
# input_shape = (1, 3, 224, 224)  # Batch size of 1, 3 color channels, height and width of 224
# dummy_input = torch.randn(*input_shape)
# dummy_input_tuple = (dummy_input,)

# Define dummy input parameters for bert
batch_size = 4  # Number of sequences in a batch
seq_length = 16  # Length of each sequence
vocab_size = 30522  # Standard BERT vocabulary size (depends on tokenizer)

# # Create dummy tensors
input_ids = torch.randint(0, vocab_size, (batch_size, seq_length))  # Random token IDs
attention_mask = torch.ones((batch_size, seq_length), dtype=torch.long)  # All tokens attended
token_type_ids = torch.zeros((batch_size, seq_length), dtype=torch.long)  # Single segment

dummy_input_tuple = (input_ids, attention_mask, token_type_ids)

# Convert the model to IRModule
with torch.no_grad():
    exported_program = export(model, dummy_input_tuple)
    print(exported_program)
    mod_from_torch = from_exported_program(
        exported_program, keep_params_as_input=True, unwrap_unit_return_tuple=True
    )


mod_from_torch, params_from_torch = tvm.relax.frontend.detach_params(mod_from_torch)

mod = mod_from_torch 
mod = relax.transform.LegalizeOps()(mod)
mod = relax.transform.FuseOps()(mod)
mod = relax.transform.FuseTIR()(mod)
# Print the IRModule
# mod_from_torch.show()
# print(mod.get_global_vars())


with tvm.target.Target("cuda"):
    gpu_mod = dl.ApplyDefaultSchedule(
        dl.gpu.Matmul(),
        dl.gpu.Fallback(),
    )(mod)

print(gpu_mod)


I get the error, saying Traceback (most recent call last): File “/home/ubuntu/TVM test/IRModule.py”, line 56, in mod_from_torch = from_exported_program( File “/home/ubuntu/.local/lib/python3.8/site-packages/tvm/relax/frontend/torch/exported_program_translator.py”, line 454, in from_exported_program return ExportedProgramImporter().from_exported_program( File “/home/ubuntu/.local/lib/python3.8/site-packages/tvm/relax/frontend/torch/exported_program_translator.py”, line 350, in from_exported_program assert ( AssertionError: Unsupported function type rsub.Scalar [22:36:17] /workspace/tvm/src/relax/ir/block_builder.cc:65: Warning: BlockBuilder destroyed with remaining blocks!

I am trying to understand if this is an issue with the way I am using BERT or some change I need to make in the TVM TIR generation process