Hi,
When i am using TVM to tune the model(Conv-based) on NV GPU, i found there is always “Skipped because of invalid gpu kernel” error message shown. The generated gpu kernel is always invalid. I wonder if my usage is wrong. Here is the error message and the code. Could someone help take a look?
Error message:
[tt] autotvm i = 1920 . k = 37
[tt] autotvm tuner tune, flops = 0 . best_flops = 0
DEBUG:autotvm:No: 1958 GFLOPS: 0.00/0.00 result: Traceback (most recent call last):
File "/home/dimitrov/tvm/python/tvm/autotvm/measure/measure_methods.py", line 567, in __call__
func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
File "/home/dimitrov/tvm/python/tvm/autotvm/measure/measure_methods.py", line 519, in _build_func_common
func = build(s, args, target=target, runtime=runtime)
File "/home/dimitrov/tvm/python/tvm/driver/build_module.py", line 235, in build
input_mod = lower(inputs, args, name=name, binds=binds)
File "/home/pt-gpu/dimitrov/tvm/python/tvm/driver/build_module.py", line 142, in lower
return ffi.lower_schedule(inp, args, name, binds, simple_mode)
File "/home/dimitrov/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 239, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
10: TVMFuncCall
9: _ZN3tvm7runtime13PackedFun
8: tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, tvm::runtime::String const&, tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer, void, void> const&, bool)>::AssignTypedLambda<tvm::{lambda(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, tvm::runtime::String const&, tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer, void, void> const&, bool)#5}>(tvm::{lambda(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, tvm::runtime::String const&, tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer, void, void> const&, bool)#5}, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs const, tvm::runtime::TVMRetValue) const
7: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
6: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
5: tvm::transform::Pass::operator()(tvm::IRModule) const
4: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
3: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
2: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
1: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
0: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<TVMFuncCreateFromCFunc::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
File "/home/dimitrov/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 83, in cfun
rv = local_pyfunc(*pyargs)
File "/home/dimitrov/tvm/python/tvm/autotvm/measure/measure_methods.py", line 845, in verify_pass
raise InstantiationError("Skipped because of invalid gpu kernel")
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
Here is the Code:
builder = autotvm.LocalBuilder(build_func="default")
runner = autotvm.LocalRunner(
number=number,
repeat=repeat,
timeout=10,
min_repeat_ms=100,
enable_cpu_cache_flush=True,
)
tuning_option = {
"tuner": "xgb",
"trials": 2000,
"early_stopping": None,
"measure_option": autotvm.measure_option(
builder=builder, runner=runner
),
"tuning_records": tune_record_file,
}
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
for i, task in enumerate(tasks):
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
tuner_obj = XGBTuner(task, loss_type="rank")
tuner_obj.tune(
n_trial=min(tuning_option["trials"], len(task.config_space)),
early_stopping=tuning_option["early_stopping"],
measure_option=tuning_option["measure_option"],
callbacks=[
autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
autotvm.callback.log_to_file(tuning_option["tuning_records"]),
],
)
with autotvm.apply_history_best(tuning_option["tuning_records"]):
with tvm.transform.PassContext(opt_level=3, config={}):
lib = relay.build(mod, target="cuda", params=params)
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
Thank you.