I am trying to load one of my “whisper” models this way
model = model.jit(spec=mod_spec, device="cuda", out_format="torch", debug=True)
To this I get
Traceback (most recent call last):
File "/algo/users/ppp/GoodsDetector/yolov8_train/jin/tests/python/test_model_whisper.py", line 174, in <module>
main()
File "/algo/users/ppp/GoodsDetector/yolov8_train/jin/tests/python/test_model_whisper.py", line 152, in main
model = model.jit(spec=mod_spec, device="cuda", out_format="torch", debug=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/relax/frontend/nn/core.py", line 533, in jit
spec, vm, params = _compile(spec, device, pipeline, debug) # pylint: disable=invalid-name
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/relax/frontend/nn/core.py", line 522, in _compile
relax_build(
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/relax/vm_build.py", line 341, in build
return _vmlink(
^^^^^^^^
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/relax/vm_build.py", line 247, in _vmlink
lib = tvm.build(
^^^^^^^^^^
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/driver/build_module.py", line 297, in build
rt_mod_host = _driver_ffi.tir_to_runtime(annotated_mods, target_host)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "tvm/_ffi/_cython/./packed_func.pxi", line 332, in tvm._ffi._cy3.core.PackedFuncBase.__call__
File "tvm/_ffi/_cython/./packed_func.pxi", line 263, in tvm._ffi._cy3.core.FuncCall
File "tvm/_ffi/_cython/./packed_func.pxi", line 252, in tvm._ffi._cy3.core.FuncCall3
File "tvm/_ffi/_cython/./base.pxi", line 182, in tvm._ffi._cy3.core.CHECK_CALL
File "/algo/users/pedro/softwares/MiniConda/envs/mlc/lib/python3.12/site-packages/tvm/_ffi/base.py", line 481, in raise_last_ffi_error
raise py_err
tvm._ffi.base.TVMError: Traceback (most recent call last):
11: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::runtime::Module (tvm::runtime::Map<tvm::Target, tvm::IRModule, void, void> const&, tvm::Target)>::AssignTypedLambda<tvm::__mk_TVM24::{lambda(tvm::runtime::Map<tvm::Target, tvm::IRModule, void, void> const&, tvm::Target)#1}>(tvm::__mk_TVM24::{lambda(tvm::runtime::Map<tvm::Target, tvm::IRModule, void, void> const&, tvm::Target)#1}, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::runtime::TVMRetValue)
10: tvm::TIRToRuntime(tvm::runtime::Map<tvm::Target, tvm::IRModule, void, void> const&, tvm::Target const&)
9: tvm::SplitMixedModule(tvm::IRModule, tvm::Target const&, tvm::Target const&)
8: tvm::ApplyPasses(tvm::IRModule, tvm::transform::Sequential)
7: tvm::transform::Pass::operator()(tvm::IRModule) const
6: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
5: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
4: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
3: tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
2: _ZN3tvm7runtime13PackedFun
1: tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::tir::transform::VerifyMemory()::{lambda(tvm::IRModule, tvm::transform::PassContext)#1}>(tvm::tir::transform::VerifyMemory()::{lambda(tvm::IRModule, tvm::transform::PassContext)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*) const
0: _ZN3tvm7runtime6deta
Did you forget to bind?
Variable `model_decoder_layers_0_fc1_bias` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Variable `matmul204` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Variable `T_add` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
File "/workspace/tvm/src/tir/analysis/verify_memory.cc", line 205
RuntimeError: Memory verification failed with the following errors:
# from tvm.script import tir as T
@T.prim_func
def add3(var_matmul204: T.handle, model_decoder_layers_0_fc1_bias: T.Buffer((T.int64(4096),), "float32"), var_T_add: T.handle):
T.func_attr({"target": T.target({"arch": "sm_75", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-redhat-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "max_shared_memory_per_block": 49152, "max_threads_per_block": 1024, "tag": "", "thread_warp_size": 32}), "tir.noalias": T.bool(True)})
seq_len = T.int64()
matmul204 = T.match_buffer(var_matmul204, (T.int64(1), seq_len, T.int64(4096)))
T_add = T.match_buffer(var_T_add, (T.int64(1), seq_len, T.int64(4096)))
for ax1, ax2 in T.grid(seq_len, T.int64(4096)):
cse_var_1: T.int64 = ax1 * T.int64(4096) + ax2
T_add_1 = T.Buffer((seq_len * T.int64(4096),), data=T_add.data)
matmul204_1 = T.Buffer((seq_len * T.int64(4096),), data=matmul204.data)
model_decoder_layers_0_fc1_bias_1 = T.Buffer((T.int64(4096),), data=model_decoder_layers_0_fc1_bias.data)
T_add_1[cse_var_1] = matmul204_1[cse_var_1] + model_decoder_layers_0_fc1_bias_1[ax2]
I have a similar error related to “Did you forget to bind?” on this forum but neither of them fix/relate to this.