This is my code,
import onnx
import tvm
from tvm import te
import tvm.relay as relay
from tvm.contrib.download import download_testdata
from tvm.contrib import graph_executor
import numpy as np
from tvm import rpc
import os
import torch
jit_model = torch.jit.load(‘jit_trace.pt’)
quantize = True
dummy_input = torch.randn(1, 3, 224, 224).to(“cuda”)
torch.onnx.export(jit_model, dummy_input, “model.onnx”,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'},
'output': {0: 'batch_size'}})
onnx_model = onnx.load(“model.onnx”)
input_name = “input”
input_shape = (1, 3, 224, 224)
shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
target = tvm.target.Target(“opencl -device=mali”, host=“llvm -mtriple=aarch64-linux-gnu”)
opencl_device_host = “192.168.1.115”
opencl_device_port = 9092
remote = rpc.connect(opencl_device_host, opencl_device_port,key=“your_custom_key”)
import tvm.autotvm as autotvm
from tvm.autotvm.tuner import XGBTuner
from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
tasks = autotvm.task.extract_from_program(mod[“main”], target=target, params=params)
tuning_option = {
'log_filename': 'tuning.log',
'tuner': 'xgb',
'n_trial': 5,
'early_stopping': 1,
'measure_option': autotvm.measure_option(
builder=autotvm.LocalBuilder(timeout=10),
runner=autotvm.RPCRunner(key="your_custom_key",
host=opencl_device_host,
port=opencl_device_port,
number=20,
repeat=3,
timeout=4,
min_repeat_ms=150
),
),
}
import logging
logging.basicConfig(level=logging.DEBUG)
tvm.autotvm.GLOBAL_SCOPE.silent = False
ctx = remote.cl()
print(“Device initialized:”, ctx)
for i, task in enumerate(tasks):
prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
tuner_obj = XGBTuner(task, loss_type="rank")
tuner_obj.tune(
n_trial=tuning_option['n_trial'],
early_stopping=tuning_option['early_stopping'],
measure_option=tuning_option['measure_option'],
callbacks=[
autotvm.callback.progress_bar(tuning_option['n_trial'], prefix=prefix),
autotvm.callback.log_to_file(tuning_option['log_filename']),
],
)
with autotvm.apply_history_best(tuning_option[‘log_filename’]):
with tvm.transform.PassContext(opt_level=3, config={}):
lib = relay.build(mod, target=target, params=params)
lib.export_library(“optimized_model_opencl.tar”)
remote.upload(“optimized_model_opencl.tar”)
lib = remote.load_module(“optimized_model_opencl.tar”)
dev = remote.device(str(target), 0)
module = graph_executor.GraphModule(lib"default")
input_data = np.random.uniform(size=input_shape).astype(“float32”)
module.set_input(input_name, input_data)
module.run()
output = module.get_output(0).asnumpy()
print(“Output shape:”, output.shape)
print(“Output:”, output)
autotvm.record.pick_best(tuning_option[‘log_filename’], ‘best_opencl.json’)
print(“Exported optimized model to optimized_model_opencl.so”)
print(“Saved best tuning configurations to best_opencl.json”)
when I run Autotune, it just shows:Device initialized: remote[0]:opencl(0) [Task 1/47] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/5) | 0.00 sDEBUG:autotvm:waiting for device… and then no response i could get even a day later. But everything works well if I remove Autotune,