When I use TVM to infer the VIT network, the generated dense_tensorcore.cuda operator encounter errors when building CUDA code:error: incomplete type is not allowed. This is my error report:
How can I solve this problemIt’s better to provide the steps that reproduces the error.
with tvm.relay.quantize.qconfig(calibrate_mode="global_scale", global_scale=8.0, weight_scale = "power2", skip_dense_layer=False, skip_conv_layers=[0]):
mod = tvm.relay.quantize.quantize(mod, params, dataset=dataset)
number = 20
repeat = 3
min_repeat_ms = 4
timeout = 150
runner = autotvm.LocalRunner(
number=number,
repeat=repeat,
timeout=timeout,
min_repeat_ms=min_repeat_ms,
enable_cpu_cache_flush=True,
)
tuning_option = {
"tuner": "xgb",
"trials": 2000,
"early_stopping": 600,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(build_func="default"), runner=runner
),
"tuning_records": "vit_B32_224-autotuning_n.json",
}
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
for i, task in enumerate(reversed(tasks)):
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
tuner_obj = XGBTuner(task, loss_type="rank")
tuner_obj.tune(
n_trial=min(tuning_option["trials"], len(task.config_space)),
early_stopping=tuning_option["early_stopping"],
measure_option=tuning_option["measure_option"],
callbacks=[
autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
autotvm.callback.log_to_file(tuning_option["tuning_records"]),
],
)
I tuned a quantified VIT net and encountered an error while running it
I use test_quantization_accuracy_for_vit.py
to run it