memory verifying failure for dynamic shape model

I’m new to TVM, and working on dynamic shape model tuning with it.
Now tvm gives me an error message (shows below), which seems relative to memory verifying.
Anybody can help me out ? It’s realy a uncharted territory to me…

[11:48:32] /export/Data/codeware/tvm-0.20/src/tir/analysis/verify_memory.cc:178: verifying memory for target 'cuda -keys=cuda,gpu -arch=sm_89 -max_num_threads=1024 -thread_warp_size=32' for primitive:
# from tvm.script import tir as T

@T.prim_func
def add(var_dcn_mix_dcn_input_ffn_0_MatMul_0: T.handle, B: T.Buffer((T.int64(4096),), "float32"), var_T_add: T.handle):
    T.func_attr({"target": T.target({"arch": "sm_89", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "tag": "", "thread_warp_size": 32}), "tir.noalias": T.bool(True)})
    N = T.int64()
    dcn_mix_dcn_input_ffn_0_MatMul_0 = T.match_buffer(var_dcn_mix_dcn_input_ffn_0_MatMul_0, (N, T.int64(4096)))
    T_add = T.match_buffer(var_T_add, (N, T.int64(4096)))
    for ax0, ax1 in T.grid(N, T.int64(4096)):
        cse_var_1: T.int64 = ax0 * T.int64(4096) + ax1
        T_add_1 = T.Buffer((N * T.int64(4096),), data=T_add.data)
        dcn_mix_dcn_input_ffn_0_MatMul_0_1 = T.Buffer((N * T.int64(4096),), data=dcn_mix_dcn_input_ffn_0_MatMul_0.data)
        B_1 = T.Buffer((T.int64(4096),), data=B.data)
        T_add_1[cse_var_1] = dcn_mix_dcn_input_ffn_0_MatMul_0_1[cse_var_1] + B_1[ax1]
Traceback (most recent call last):
  File "/export/Data/codeware/tvm-from_scratch/tune_dynamic_model.py", line 103, in <module>
    ex = relax.build(mod, target=target,)
  File "/export/Data/codeware/tvm-0.20/python/tvm/relax/vm_build.py", line 259, in build
    return _vmlink(
  File "/export/Data/codeware/tvm-0.20/python/tvm/relax/vm_build.py", line 154, in _vmlink
    lib = tvm.tir.build(tir_mod, target=target, pipeline=tir_pipeline)
  File "/export/Data/codeware/tvm-0.20/python/tvm/tir/build.py", line 176, in build
    mod = pipeline(mod)
  File "/export/Data/codeware/tvm-0.20/python/tvm/ir/transform.py", line 238, in __call__
    return _ffi_transform_api.RunPass(self, mod)
  File "tvm/_ffi/_cython/packed_func.pxi", line 339, in tvm._ffi._cy3.core.PackedFuncBase.__call__
  File "tvm/_ffi/_cython/packed_func.pxi", line 270, in tvm._ffi._cy3.core.FuncCall
  File "tvm/_ffi/_cython/packed_func.pxi", line 259, in tvm._ffi._cy3.core.FuncCall3
  File "tvm/_ffi/_cython/base.pxi", line 185, in tvm._ffi._cy3.core.CHECK_CALL
  File "/export/Data/codeware/tvm-0.20/python/tvm/_ffi/base.py", line 468, in raise_last_ffi_error
    raise py_err
  File "tvm/_ffi/_cython/packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
  File "/export/Data/codeware/tvm-0.20/python/tvm/tir/pipeline.py", line 122, in _pipeline
    mod = tvm.ir.transform.Sequential(passes)(mod)
  File "/export/Data/codeware/tvm-0.20/python/tvm/ir/transform.py", line 238, in __call__
    return _ffi_transform_api.RunPass(self, mod)
  File "tvm/_ffi/_cython/packed_func.pxi", line 339, in tvm._ffi._cy3.core.PackedFuncBase.__call__
  File "tvm/_ffi/_cython/packed_func.pxi", line 270, in tvm._ffi._cy3.core.FuncCall
  File "tvm/_ffi/_cython/packed_func.pxi", line 259, in tvm._ffi._cy3.core.FuncCall3
  File "tvm/_ffi/_cython/base.pxi", line 185, in tvm._ffi._cy3.core.CHECK_CALL
  File "/export/Data/codeware/tvm-0.20/src/tir/analysis/verify_memory.cc", line 208, in tvm::tir::transform::VerifyMemory()::$_0::operator()(tvm::IRModule, tvm::transform::PassContext) const
    LOG(FATAL) << "RuntimeError: Memory verification failed with the following errors:\n"
tvm._ffi.base.TVMError: Traceback (most recent call last):
  0: tvm::tir::transform::VerifyMemory()::$_0::operator()(tvm::IRModule, tvm::transform::PassContext) const
        at /export/Data/codeware/tvm-0.20/src/tir/analysis/verify_memory.cc:208
  Did you forget to bind?
    Variable `B` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
    Variable `dcn_mix_dcn_input_ffn_0_MatMul_0` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
    Variable `T_add` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
  File "/export/Data/codeware/tvm-0.20/src/tir/analysis/verify_memory.cc", line 208
RuntimeError: Memory verification failed with the following errors:
# from tvm.script import tir as T

@T.prim_func
def add(var_dcn_mix_dcn_input_ffn_0_MatMul_0: T.handle, B: T.Buffer((T.int64(4096),), "float32"), var_T_add: T.handle):
    T.func_attr({"target": T.target({"arch": "sm_89", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "tag": "", "thread_warp_size": 32}), "tir.noalias": T.bool(True)})
    N = T.int64()
    dcn_mix_dcn_input_ffn_0_MatMul_0 = T.match_buffer(var_dcn_mix_dcn_input_ffn_0_MatMul_0, (N, T.int64(4096)))
    T_add = T.match_buffer(var_T_add, (N, T.int64(4096)))
    for ax0, ax1 in T.grid(N, T.int64(4096)):
        cse_var_1: T.int64 = ax0 * T.int64(4096) + ax1
        T_add_1 = T.Buffer((N * T.int64(4096),), data=T_add.data)
        dcn_mix_dcn_input_ffn_0_MatMul_0_1 = T.Buffer((N * T.int64(4096),), data=dcn_mix_dcn_input_ffn_0_MatMul_0.data)
        B_1 = T.Buffer((T.int64(4096),), data=B.data)
        T_add_1[cse_var_1] = dcn_mix_dcn_input_ffn_0_MatMul_0_1[cse_var_1] + B_1[ax1]

these messages no more exist after 2 changes on my code.

  1. relax.build(mod, target) ==> relax.build(mod, target, relax_pipeline=relax.get_default_pipeline(target))
  2. detach params from mod: mod, params = relax.frontend.detach_params(mod)

Still not sure whether default_pipeline contain tuning process, as I saw relax.build only take a few second.