Hi All,
I am trying to tune relay.nn.conv2d
for Tesla V100 GPU, but am running into errors. I call the tuner as follows:
data = relay.var('data', shape=(N, H, W, C), dtype='float32')
weight = relay.var('weight', shape=(R, S, C, K), dtype='float32')
padding_int = (0, 0)
net = relay.nn.conv2d(data,
weight,
strides=(stride, stride),
padding=padding_int,
channels=K,
kernel_size=(R, S),
data_layout='NHWC',
kernel_layout='HWIO',
out_layout='NHWC',
out_dtype='float32')
module = tvm.IRModule.from_expr(net)
tasks = autotvm.task.extract_from_program(module, {}, target=target)
measure_option = autotvm.measure_option(
builder=autotvm.LocalBuilder(timeout=10),
runner=autotvm.LocalRunner(number=5, repeat=1, min_repeat_ms=300))
for i, task in enumerate(tasks):
tuner_obj = tuner_cls(task)
if os.path.isfile(logfile.format(i)):
print('Loading previous tuning results from logfile {}'.format(logfile.format(i)))
tuner_obj.load_history(autotvm.record.load_from_file(logfile.format(i)))
tuner_obj.tune(n_trial=trials[i],
early_stopping=trials[i],
measure_option=measure_option,
callbacks=[
autotvm.callback.progress_bar(trials[i]),
autotvm.callback.log_to_file(logfile.format(i))])
# save best config
dispatch_context = autotvm.apply_history_best(logfile.format(i))
best_config = dispatch_context.query(task.target, task.workload)
with open(configfile.format(i), 'w') as file:
file.write(str(best_config))
# build kernel with best config
with autotvm.apply_history_best(logfile.format(i)):
with tvm.transform.PassContext(opt_level=3):
with relay.build_config(opt_level=3):
graph, lib, params = relay.build_module.build(module, target=target, params=None)
This yields 2 tasks. The tuning itself runs fine for both task, but building the kernel with the best found configuration fails for the second task with the following errors:
Traceback (most recent call last):
File "tune_conv.py", line 348, in <module>
graph, lib, params = relay.build_module.build(module, target=target, params=None)
File "/tvm/tvm/python/tvm/relay/build_module.py", line 255, in build
graph_json, mod, params = bld_mod.build(mod, target, target_host, params)
File "/tvm/tvm/python/tvm/relay/build_module.py", line 121, in build
self._build(mod, target, target_host)
File "/tvm/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 225, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (8) /tvm/tvm/build/libtvm.so(tvm::relay::backend::RelayBuildModule::BuildRelay(tvm::IRModule, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, tvm::runtime::NDArray, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, tvm::runtime::NDArray> > > const&)+0xdc1) [0x2b8094ad1221]
[bt] (7) /tvm/tvm/build/libtvm.so(tvm::build(tvm::Map<tvm::runtime::String, tvm::IRModule, void, void> const&, tvm::Target const&)+0xbe) [0x2b809426d22e]
[bt] (6) /tvm/tvm/build/libtvm.so(tvm::build(tvm::Map<tvm::Target, tvm::IRModule, void, void> const&, tvm::Target const&)+0x3fa) [0x2b809426ca1a]
[bt] (5) /tvm/tvm/build/libtvm.so(tvm::SplitDevHostFuncs(tvm::IRModule, tvm::Target const&, tvm::Target const&, tvm::transform::PassContext const&)+0x349) [0x2b809426b849]
[bt] (4) /tvm/tvm/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x52) [0x2b80941dc112]
[bt] (3) /tvm/tvm/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x2dc) [0x2b80942bb14c]
[bt] (2) /tvm/tvm/build/libtvm.so(tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x125) [0x2b80942bbd45]
[bt] (1) /tvm/tvm/build/libtvm.so(+0x8cba1d) [0x2b8094494a1d]
[bt] (0) /tvm/tvm/build/libtvm.so(+0x8c869d) [0x2b809449169d]
Did you forget to bind?
Variable `placeholder` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Variable `transform_weight` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
...
File "/tvm/tvm/src/tir/analysis/verify_memory.cc", line 202
RuntimeError: Memory verification failed with the following errors:
PrimFunc([placeholder, transform_weight]) attrs={"global_symbol": "fused_nn_contrib_conv2d_winograd_weight_transform", "tir.noalias": (bool)1, "target": opencl -keys=opencl,gpu -max_num_threads=256} {
parallel (co, 0, 64) {
for (ci, 0, 3) {
transform_weight[((co*3) + ci)] = 0f
transform_weight[(((co*3) + ci) + 192)] = 0f
...
}
}
}
Can you help me find out what causes this? Many thanks in advance!