How can I use TVM+Tensorrt for algorithm deployment and achieve better performance than using Tensorrt alone

import onnx import numpy as np import tvm from tvm import runtime import tvm.relay as relay import tvm.relax as relax from tvm.contrib import graph_executor,debugger from tvm.relay.op.contrib import tensorrt import logging

from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt,get_tensorrt_version,is_tensorrt_runtime_enabled,get_tensorrt_target ######################################################################

Load pretrained ONNX model

---------------------------------------------

print(is_tensorrt_runtime_enabled()) print(get_tensorrt_target()) print(get_tensorrt_version()) model_path = “models/yolov5s.v5.onnx” print(tvm.file) logging.basicConfig(level=logging.DEBUG) onnx_model = onnx.load(model_path)

Compile the model with relay

BATCH_SIZE = 1 input_shape = (BATCH_SIZE, 3, 640, 640)

input_name = “images” dtype=“float16”

shape_dict = {input_name: input_shape} mod, params = relay.frontend.from_onnx(onnx_model, shape_dict,dtype=dtype) mod = relay.transform.InferType()(mod) mod = tensorrt.partition_for_tensorrt(mod)

with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod,target=“cuda”,params=params)

dev = tvm.cuda(0)

module_exec = graph_executor.GraphModule(lib"default")

x_data = np.random.uniform(-1, 1, input_shape).astype(dtype) module_exec.set_input(input_name, x_data) print(module_exec.benchmark(dev,number=1,repeat=1))