Recently I come across several problems when trying to autotune model in pix2pixHD, which is a image-to-image model published in CVPR2018. At the beginning, I tried to directly transfer model from pytorch to relay, but a strange bug stopped me. Then I tried to convert model from pytorch to onnx and converted model from onnx to tvm successfully. Finally I tuned the model following the tutorial(Auto-tuning a Convolutional Network for x86 CPU — tvm 0.8.dev0 documentation). But the value of GFLOPS was always zero, and from the debug mode, I saw bug like this:
DEBUG:autotvm:No: 207 GFLOPS: 0.00/0.00 result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):
[bt] (7) /home/ice/SGX/Tvm/tvm/build/libtvm.so(TVMModGetFunction+0x87) [0x7fd88e13ce07]
[bt] (6) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)+0x4d) [0x7fd88e1582dd]
[bt] (5) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)+0x2ad) [0x7fd88e1ad22d]
[bt] (4) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x8e) [0x7fd88e19b5ae]
[bt] (3) /home/ice/SGX/Tvm/tvm/build/libtvm.so(+0x15a0d8f) [0x7fd88e195d8f]
[bt] (2) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCEndpoint::HandleUntilReturnEvent(bool, std::function<void (tvm::runtime::TVMArgs)>)+0x1a5) '),), error_no=4, all_cost=10.251556873321533, timestamp=1616078768.419262)
[('tile_ic', [-1, 3]), ('tile_oc', [-1, 4]), ('tile_ow', [-1, 16]), ('unroll_kw', True)],None,290
And source code of my auto-tuning python script is:
import os, tvm, onnx
from tvm import relay, autotvm
import numpy as np
from tvm.relay import testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
import tvm.contrib.graph_runtime as runtime
def get_network():
input_shape = (1,36,1024,2048)
output_shape = (1, 3, 1024, 2048)
model_path = "pix2pixHD.onnx"
onnx_model = onnx.load(model_path)
input_name = "actual_input_1"
shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
return mod, params, input_shape, output_shape
target = "llvm -mcpu=skylake"
dtype = "float32"
model_name = "pix2pixHD"
log_file = "%s.log" % model_name
graph_opt_sch_file = "%s_graph_opt.log" % model_name
input_name = "actual_input_1"
num_threads = 4
os.environ["TVM_NUM_THREADS"] = str(num_threads)
tuning_option = {
"log_filename": log_file,
"tuner": "random",
"early_stopping": None,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(),
runner=autotvm.LocalRunner(
number=1, repeat=10, min_repeat_ms=0, enable_cpu_cache_flush=True
),
),
}
# You can skip the implementation of this function for this tutorial.
def tune_kernels(
tasks, measure_option, tuner="gridsearch", early_stopping=None, log_filename="tuning.log"
):
for i, task in enumerate(tasks):
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
# create tuner
if tuner == "xgb" or tuner == "xgb-rank":
tuner_obj = XGBTuner(task, loss_type="rank")
elif tuner == "ga":
tuner_obj = GATuner(task, pop_size=50)
elif tuner == "random":
tuner_obj = RandomTuner(task)
elif tuner == "gridsearch":
tuner_obj = GridSearchTuner(task)
else:
raise ValueError("Invalid tuner: " + tuner)
# do tuning
n_trial = len(task.config_space)
tuner_obj.tune(
n_trial=n_trial,
early_stopping=early_stopping,
measure_option=measure_option,
callbacks=[
autotvm.callback.progress_bar(n_trial, prefix=prefix),
autotvm.callback.log_to_file(log_filename),
],
)
# Use graph tuner to achieve graph level optimal schedules
# Set use_DP=False if it takes too long to finish.
def tune_graph(graph, dshape, records, opt_sch_file, use_DP=True):
target_op = [
relay.op.get("nn.conv2d"),
]
Tuner = DPTuner if use_DP else PBQPTuner
executor = Tuner(graph, {input_name: dshape}, records, target_op, target)
executor.benchmark_layout_transform(min_exec_num=2000)
executor.run()
executor.write_opt_sch2record_file(opt_sch_file)
def tune_and_evaluate(tuning_opt):
# extract workloads from relay program
print("Extract tasks...")
mod, params, data_shape, out_shape = get_network()
tasks = autotvm.task.extract_from_program(
mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
)
# run tuning tasks
tune_kernels(tasks, **tuning_opt)
tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)
# compile kernels with graph-level best records
autotvm.record.pick_best(log_file,graph_opt_sch_file)
with autotvm.apply_history_best(graph_opt_sch_file):
print("Compile...")
with tvm.transform.PassContext(opt_level=3):
lib = relay.build_module.build(mod, target=target, params=params)
# upload parameters to device
ctx = tvm.cpu()
data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
module = runtime.GraphModule(lib["default"](ctx))
module.set_input(input_name, data_tvm)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=20, repeat=3)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print(
"Mean inference time (std dev): %.2f ms (%.2f ms)"
% (np.mean(prof_res), np.std(prof_res))
)
tune_and_evaluate(tuning_option)
When I changed the parameters for autotvm.LocalRunner, like changing repeat from 10 to 3, then the tuning script could run for a little while, but after serveral hours, it would reproduce the bug again. I am a newbie to tvm and now I have no idea about how to solve this issue. I would be very grateful indeed for any help you could give me.
Environment
- Ubuntu 20.04
- TVM: 0.8.dev0
- I installed TVM following the tutorials in Install from Source — tvm 0.8.dev0 documentation