Hi @kazum - Thank you for the previous suggestions, I am also looking at how to use autotvm to tune a model on iOS.
Below is a modified version of ‘tutorials/autotvm/tune_relay_arm.py’ that is based on your previous comment suggestion of adding a build_func, but something isn’t working quite right yet.
Tuning tasks are stuck at 0 GFLOPS and the tuning trials time out.
[Task 1/12] Current/Best: 0.00/ 0.00 GFLOPS | Progress: (0/100) | 0.00 s
If I skip tuning (remove ‘#tune_tasks(tasks, **tuning_opt’), it successfully builds and runs the untuned model and reports an inference result.
Any idea what step might be missing here?
Thank you!
-
Assumption: you have a single macOS based host running the rpc proxy, tracker and xcode, with local network IP: 192.168.0.10
-
Setup environment variables:
export TVM_IOS_CODESIGN='Apple Development: <your@email.com> (<SIGNINGCODE>)' export TVM_IOS_RPC_ROOT=${TVM_HOME}/apps/ios_rpc export TVM_IOS_RPC_PROXY_HOST=192.168.0.10 #export TVM_IOS_RPC_DESTINATION='platform=iOS Simulator,id=<simulator id>' export TVM_IOS_RPC_DESTINATION='platform=iOS,id=<ios device id>'
-
Start the tracker
python3 -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 --no-fork INFO:RPCTracker:bind to 0.0.0.0:9190
-
Start the rpc proxy and point it to the tracker
python3 -m tvm.exec.rpc_proxy --host 0.0.0.0 --tracker 0.0.0.0:9190 --no-fork INFO:root:RPCProxy: client port bind to 0.0.0.0:9090
-
Run tuning:
cd ${TVM_HOME}/apps/ios_rpc python3 tests/tune_relay_ios.py
Code:
"""
apps/ios_rpc/tests/tune_relay_ios.py
Auto-tuning a convolutional network for iPhone CPU
===============================================
"""
import os
import numpy as np
import tvm
from tvm import te
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm.contrib import xcode
#################################################################
# Define network
# --------------
# First we need to define the network in relay frontend API.
# We can load some pre-defined network from :code:`relay.testing`.
# We can also load models from MXNet, ONNX and TensorFlow.
def get_network(name, batch_size):
"""Get the symbol definition and random weight of a network"""
input_shape = (batch_size, 3, 224, 224)
output_shape = (batch_size, 1000)
if "resnet" in name:
n_layer = int(name.split('-')[1])
mod, params = relay.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif "vgg" in name:
n_layer = int(name.split('-')[1])
mod, params = relay.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif name == 'mobilenet':
mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size)
elif name == 'squeezenet_v1.1':
mod, params = relay.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1', dtype=dtype)
elif name == 'inception_v3':
input_shape = (1, 3, 299, 299)
mod, params = relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
elif name == 'mxnet':
# an example for mxnet model
from mxnet.gluon.model_zoo.vision import get_model
block = get_model('resnet18_v1', pretrained=True)
mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
net = mod["main"]
net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs)
mod = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
return mod, params, input_shape, output_shape
#################################################################
# Start RPC Tracker
# ------------------
# python3 -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 --no-fork
#
# - Autotvm will use the tracker to orchestrate tuning test runs.
#
# Start RPC Proxy
# python3 -m tvm.exec.rpc_proxy --host 0.0.0.0 --tracker 0.0.0.0:9190 --no-fork
###########################################
# Set Tuning Options
# ------------------
#### DEVICE CONFIG ####
# Set to be address of tvm proxy.
proxy_host = os.environ["TVM_IOS_RPC_PROXY_HOST"]
# Set your desination via env variable.
# Should in format "platform=iOS,id=<the test device uuid>"
destination = os.environ["TVM_IOS_RPC_DESTINATION"]
device_key = 'iphone'
proxy_port = 9090
arch = "arm64"
sdk = "iphoneos"
target = "llvm -mtriple=%s-apple-darwin" % arch
target_host = "llvm -mtriple=%s-apple-darwin" % arch
#### TUNING OPTION ####
network = 'resnet-18'
log_file = "%s.%s.log" % (device_key, network)
dtype = 'float32'
autotvm.measure.measure_methods.check_remote = lambda *args: True
def fcompile(*args):
xcode.create_dylib(*args, arch=arch, sdk=sdk)
path = args[0]
xcode.codesign(path)
xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path])
fcompile.output_format = "dylib"
tuning_option = {
'log_filename': log_file,
'tuner': 'random',
'early_stopping': None,
'n_trial': 100,
'measure_option': autotvm.measure_option(
builder=autotvm.LocalBuilder(
n_parallel=1,
build_func=fcompile,
timeout=60
),
runner=autotvm.RPCRunner(
device_key, host='127.0.0.1', port=9190,
number=20, repeat=3, timeout=60, min_repeat_ms=150)
),
}
###################################################################
# Begin Tuning
# ------------
def tune_tasks(tasks,
measure_option,
tuner='random',
n_trial=1000,
early_stopping=None,
log_filename='tuning.log',
use_transfer_learning=True):
# create tmp log file
tmp_log_file = log_filename + ".tmp"
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)
for i, tsk in enumerate(reversed(tasks)):
prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
# create tuner
if tuner == 'xgb' or tuner == 'xgb-rank':
tuner_obj = XGBTuner(tsk, loss_type='rank')
elif tuner == 'xgb_knob':
tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
elif tuner == 'ga':
tuner_obj = GATuner(tsk, pop_size=50)
elif tuner == 'random':
tuner_obj = RandomTuner(tsk)
elif tuner == 'gridsearch':
tuner_obj = GridSearchTuner(tsk)
else:
raise ValueError("Invalid tuner: " + tuner)
if use_transfer_learning:
if os.path.isfile(tmp_log_file):
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
# do tuning
tsk_trial = min(n_trial, len(tsk.config_space))
tuner_obj.tune(n_trial=tsk_trial,
early_stopping=early_stopping,
measure_option=measure_option,
callbacks=[
autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
autotvm.callback.log_to_file(tmp_log_file)
])
# pick best records to a cache file
autotvm.record.pick_best(tmp_log_file, log_filename)
os.remove(tmp_log_file)
########################################################################
# Finally, we launch tuning jobs and evaluate the end-to-end performance.
def tune_and_evaluate(tuning_opt):
# extract workloads from relay program
print("Extract tasks...")
mod, params, input_shape, _ = get_network(network, batch_size=1)
tasks = autotvm.task.extract_from_program(mod["main"], target=target,
params=params,
ops=(relay.op.get("nn.conv2d"),))
# run tuning tasks
print("Tuning...")
tune_tasks(tasks, **tuning_opt)
# compile kernels with history best records
with autotvm.apply_history_best(log_file):
print("Compile...")
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build_module.build(
mod, target=target, params=params)
# export library
path_dso = "tuned_deploy.dylib"
lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk)
xcode.codesign(path_dso)
# Evaluate inference cost on tuned lib
xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path_dso])
remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9190,
timeout=10000)
# Upload not needed for ios because dylib is built into app
# remote.upload(path_dso)
rlib = remote.load_module(path_dso)
ctx = remote.cpu(0)
module = runtime.create(graph, rlib, ctx)
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input('data', data_tvm)
module.set_input(**params)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=20)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
# We do not run the tuning in our webpage server since it takes too long.
# Uncomment the following line to run it by yourself.
if __name__ == '__main__':
if os.path.exists("rpc_config.txt"):
os.remove("rpc_config.txt")
tune_and_evaluate(tuning_option)
######################################################################
# Sample Output
# -------------