(These code is recommended to run in a jupyter notebook so you won’t lose tuning record for example if your code crashed midway)
- Imports
import tvm
from tvm import relay
from tvm.driver.tvmc.transform import apply_graph_transforms
import onnx
import tvm.testing
import tvm.topi.testing
from tvm import meta_schedule as ms
from tvm.meta_schedule.runner import (
EvaluatorConfig,
LocalRunner,
PyRunner,
RPCConfig,
RPCRunner,
)
- Config
target = tvm.target.Target("opencl -device=mali", host="llvm -mtriple=aarch64-linux-gnu") # Your target, here is Mali GPU, change it to your target
onnx_model_path = "./yourmodel.onnx" # Your ONNX model just trained and ready to be deployed!!
- Run tuning
onnx_model = onnx.load(onnx_model_path)
mod,params = relay.frontend.from_onnx(onnx_model)
# if you want to use mixed/fp16 precision, uncomment the following lines
# mod = apply_graph_transforms(
# mod,
# {
# "mixed_precision": True,
# "mixed_precision_calculation_type": "float16",
# "mixed_precision_acc_type": "float16",
# },
# )
# RPC config for remote device
rpc_config = RPCConfig(
tracker_host="127.0.0.1",
tracker_port=9190,
tracker_key="rk3588",
session_priority=1,
session_timeout_sec=10,
)
# Evaluator config. You can change the number of trials, repeat times, etc.
evaluator_config = EvaluatorConfig(
number=10,
repeat=1,
min_repeat_ms=1,
enable_cpu_cache_flush=True,
)
# Use RPC runner to run on remote device
runner = RPCRunner(rpc_config, evaluator_config)
# Or use local runner
# runner = LocalRunner(evaluator_config)
# Start tuning and search for the best schedule
# Does not know how to resume a interrupted search, plz let me know if you know how to do it
database = ms.relay_integration.tune_relay(
mod=mod,
params=params,
target=target,
max_trials_global=10000, # larger value for better performance, but take longer time to search
runner=runner,
work_dir="./work",
seed=0
)
- Compile
# Compile the best schedule
lib = ms.relay_integration.compile_relay(
database=database,
mod=mod,
params=params,
target=target,
)
- Export
import tvm.driver.tvmc.model as tvmc_model
# export lib, graph and parameters
model = tvmc_model.TVMCModel(mod, params)
# export package
model.export_package(lib, onnx_model_path.replace(".onnx", ".tar"), "aarch64-linux-gnu-gcc") # change it to your cross compiler or leave it empty
- Run
import tvm
from tvm.driver import tvmc
from tvm.contrib import graph_executor
import time
package = tvmc.TVMCPackage("yourmodel.tar")
# tvmc.run is extremely slow!!
# result = tvmc.run(package, device="cl", inputs={"input1": tvm.nd.array(your_input_tensor)})
lib = tvm.runtime.load_module(package.lib_path)
executor = graph_executor.create(package.graph, lib, tvm.cl()) # Change to tvm.cpu() if you want to run on CPU
executor.load_params(package.params)
executor.run() # warm up
inp = {"input1": tvm.nd.array(your_input_tensor)}
executor.set_input(**inp)
start_time = time.time()
executor.run()
result = executor.get_output(0).numpy()
end_time = time.time()
print(result)
print("time: ", end_time-start_time)