Error in autotvm when auto-tuning pix2pixHD

bwhe · March 19, 2021, 8:53am

Recently I come across several problems when trying to autotune model in pix2pixHD, which is a image-to-image model published in CVPR2018. At the beginning, I tried to directly transfer model from pytorch to relay, but a strange bug stopped me. Then I tried to convert model from pytorch to onnx and converted model from onnx to tvm successfully. Finally I tuned the model following the tutorial(Auto-tuning a Convolutional Network for x86 CPU — tvm 0.8.dev0 documentation). But the value of GFLOPS was always zero, and from the debug mode, I saw bug like this:

DEBUG:autotvm:No: 207   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):
[bt] (7) /home/ice/SGX/Tvm/tvm/build/libtvm.so(TVMModGetFunction+0x87) [0x7fd88e13ce07]
[bt] (6) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)+0x4d) [0x7fd88e1582dd]
[bt] (5) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)+0x2ad) [0x7fd88e1ad22d]
[bt] (4) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x8e) [0x7fd88e19b5ae] 
[bt] (3) /home/ice/SGX/Tvm/tvm/build/libtvm.so(+0x15a0d8f) [0x7fd88e195d8f]
[bt] (2) /home/ice/SGX/Tvm/tvm/build/libtvm.so(tvm::runtime::RPCEndpoint::HandleUntilReturnEvent(bool, std::function<void (tvm::runtime::TVMArgs)>)+0x1a5) '),), error_no=4, all_cost=10.251556873321533, timestamp=1616078768.419262)    
[('tile_ic', [-1, 3]), ('tile_oc', [-1, 4]), ('tile_ow', [-1, 16]), ('unroll_kw', True)],None,290

And source code of my auto-tuning python script is:

import os, tvm, onnx
from tvm import relay, autotvm
import numpy as np
from tvm.relay import testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
import tvm.contrib.graph_runtime as runtime

def get_network():
    input_shape = (1,36,1024,2048)
    output_shape = (1, 3, 1024, 2048)
    model_path = "pix2pixHD.onnx"
    onnx_model = onnx.load(model_path)
    input_name = "actual_input_1"
    shape_dict = {input_name: input_shape}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
    return mod, params, input_shape, output_shape

target = "llvm -mcpu=skylake"
dtype = "float32"
model_name = "pix2pixHD"
log_file = "%s.log" % model_name
graph_opt_sch_file = "%s_graph_opt.log" % model_name

input_name = "actual_input_1"

num_threads = 4
os.environ["TVM_NUM_THREADS"] = str(num_threads)

tuning_option = {
    "log_filename": log_file,
    "tuner": "random",
    "early_stopping": None,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(
            number=1, repeat=10, min_repeat_ms=0, enable_cpu_cache_flush=True
        ),
    ),
}


# You can skip the implementation of this function for this tutorial.
def tune_kernels(
    tasks, measure_option, tuner="gridsearch", early_stopping=None, log_filename="tuning.log"
):
    for i, task in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # create tuner
        if tuner == "xgb" or tuner == "xgb-rank":
            tuner_obj = XGBTuner(task, loss_type="rank")
        elif tuner == "ga":
            tuner_obj = GATuner(task, pop_size=50)
        elif tuner == "random":
            tuner_obj = RandomTuner(task)
        elif tuner == "gridsearch":
            tuner_obj = GridSearchTuner(task)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        # do tuning
        n_trial = len(task.config_space)
        tuner_obj.tune(
            n_trial=n_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(n_trial, prefix=prefix),
                autotvm.callback.log_to_file(log_filename),
            ],
        )


# Use graph tuner to achieve graph level optimal schedules
# Set use_DP=False if it takes too long to finish.
def tune_graph(graph, dshape, records, opt_sch_file, use_DP=True):
    target_op = [
        relay.op.get("nn.conv2d"),
    ]
    Tuner = DPTuner if use_DP else PBQPTuner
    executor = Tuner(graph, {input_name: dshape}, records, target_op, target)
    executor.benchmark_layout_transform(min_exec_num=2000)
    executor.run()
    executor.write_opt_sch2record_file(opt_sch_file)

def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network()
    tasks = autotvm.task.extract_from_program(
        mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
    )

    # run tuning tasks
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels with graph-level best records
    autotvm.record.pick_best(log_file,graph_opt_sch_file)
    with autotvm.apply_history_best(graph_opt_sch_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.GraphModule(lib["default"](ctx))
        module.set_input(input_name, data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=20, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print(
            "Mean inference time (std dev): %.2f ms (%.2f ms)"
            % (np.mean(prof_res), np.std(prof_res))
        )

tune_and_evaluate(tuning_option)

When I changed the parameters for autotvm.LocalRunner, like changing repeat from 10 to 3, then the tuning script could run for a little while, but after serveral hours, it would reproduce the bug again. I am a newbie to tvm and now I have no idea about how to solve this issue. I would be very grateful indeed for any help you could give me.

Environment

Ubuntu 20.04
TVM: 0.8.dev0
I installed TVM following the tutorials in Install from Source — tvm 0.8.dev0 documentation