I am trying to convert a huggingface BERT model into a TIR to run some analysis on the operators and thhe fusion operators. Howeever I am running into an issue when generating a mod from the model.
import numpy as np
import tvm from tvm
import relax import torch
import onnx from torch
import nn from tvm
import relay from torch.export
import export from tvm.relax.frontend.torch
import from_exported_program from tvm.tir.analysis
import calculate_workspace_bytes from torchvision.models
import resnet50 from transformers
import BertModel, BertTokenizer from tvm.contrib
import graph_executor from tvm
import dlight as dl
```
# model = resnet50()
# model_weights_path = "/home/ubuntu/models/faas-share-test/MLPerf-based-workloads/resnet/models/resnet/resnet50.pth"
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
# Load the model
model = BertModel.from_pretrained('bert-large-uncased')
if "embeddings.position_ids" not in model.state_dict():
# Add position_ids to the state_dict
position_ids = torch.arange(512).unsqueeze(0) # Adjust size as needed
model.embeddings.register_buffer("position_ids", position_ids)
# checkpoint = torch.load(model_weights_path)
# print(checkpoint['model'].keys())
# model.load_state_dict(torch.load(model_weights_path))
model.eval() # Set the model to evaluation mode
# Step 3: Define a dummy input with the correct shape for ResNet-50
# input_shape = (1, 3, 224, 224) # Batch size of 1, 3 color channels, height and width of 224
# dummy_input = torch.randn(*input_shape)
# dummy_input_tuple = (dummy_input,)
# Define dummy input parameters for bert
batch_size = 4 # Number of sequences in a batch
seq_length = 16 # Length of each sequence
vocab_size = 30522 # Standard BERT vocabulary size (depends on tokenizer)
# # Create dummy tensors
input_ids = torch.randint(0, vocab_size, (batch_size, seq_length)) # Random token IDs
attention_mask = torch.ones((batch_size, seq_length), dtype=torch.long) # All tokens attended
token_type_ids = torch.zeros((batch_size, seq_length), dtype=torch.long) # Single segment
dummy_input_tuple = (input_ids, attention_mask, token_type_ids)
# Convert the model to IRModule
with torch.no_grad():
exported_program = export(model, dummy_input_tuple)
print(exported_program)
mod_from_torch = from_exported_program(
exported_program, keep_params_as_input=True, unwrap_unit_return_tuple=True
)
mod_from_torch, params_from_torch = tvm.relax.frontend.detach_params(mod_from_torch)
mod = mod_from_torch
mod = relax.transform.LegalizeOps()(mod)
mod = relax.transform.FuseOps()(mod)
mod = relax.transform.FuseTIR()(mod)
# Print the IRModule
# mod_from_torch.show()
# print(mod.get_global_vars())
with tvm.target.Target("cuda"):
gpu_mod = dl.ApplyDefaultSchedule(
dl.gpu.Matmul(),
dl.gpu.Fallback(),
)(mod)
print(gpu_mod)
I get the error, saying Traceback (most recent call last): File “/home/ubuntu/TVM test/IRModule.py”, line 56, in mod_from_torch = from_exported_program( File “/home/ubuntu/.local/lib/python3.8/site-packages/tvm/relax/frontend/torch/exported_program_translator.py”, line 454, in from_exported_program return ExportedProgramImporter().from_exported_program( File “/home/ubuntu/.local/lib/python3.8/site-packages/tvm/relax/frontend/torch/exported_program_translator.py”, line 350, in from_exported_program assert ( AssertionError: Unsupported function type rsub.Scalar [22:36:17] /workspace/tvm/src/relax/ir/block_builder.cc:65: Warning: BlockBuilder destroyed with remaining blocks!
I am trying to understand if this is an issue with the way I am using BERT or some change I need to make in the TVM TIR generation process