I’m using tvm master source, build with tensorrt 8.2
To reproduce, use this script code:
import tvm
from tvm import relay, auto_scheduler
import os
import torch
import numpy as np
os.environ["TVM_LOG_DEBUG"] = "0"
input_name = "input0"
img = np.random.randn(1, 3, 224, 224)
# scripted_model = torch.jit.load("torch_script_module.pt")
model = models.resnet50(pretrained=False).eval().cuda()
np.random.seed(0)
x = torch.from_numpy(np.random.randn(1,3,224,224).astype(np.float32)).cuda()
scripted_model = torch.jit.trace(model, x)
shape_list = [(input_name, img.shape)]
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
mod, config = partition_for_tensorrt(mod, params)
target = tvm.target.Target("cuda")
dev = tvm.device(str(target), 0)
with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}):
lib = relay.build(mod, target=target, params=params)
from tvm.contrib import graph_executor
dtype = "float32"
m = graph_executor.GraphModule(lib["default"](dev))
# Set inputs
m.set_input(input_name, tvm.nd.array(img.astype(dtype)))
# Execute
m.run()
# Get outputs
tvm_output = m.get_output(0)
print(tvm_output.shape)
# Evaluate
print("Evaluate raw tvm inference time cost...")
m.benchmark(dev, repeat=100, min_repeat_ms=500)
print(m.benchmark(dev, repeat=1000, min_repeat_ms=500))
Got outputs:
[14:51:33] /root/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.max_pool2d
[14:51:35] /root/tvm/src/runtime/contrib/tensorrt/tensorrt_logger.h:54: Warning: WARNING: Tensor DataType is determined at build time for tensors not marked as input or output.
[14:51:35] /root/tvm/src/runtime/contrib/tensorrt/tensorrt_logger.h:54: Warning: WARNING: Tensor DataType is determined at build time for tensors not marked as input or output.
Traceback (most recent call last):
File "./torch_tvm_trt.py", line 38, in <module>
m.run()
File "/root/tvm/python/tvm/contrib/graph_executor.py", line 208, in run
self._run()
File "/root/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: vector: :_M_range_check: __n (which is 1) >= this->size() (which is 1)
Does it mean that the input batchsize is too large? but the input batch size is only 1.
How can I solving this problem. Could anyone help me? thanks!