i am doing convert onnx model to tvm, i have tryed some values of opt_level,when i set it as follows:
with relay.build_config(opt_level=1):
it works well, i have tested it in android_deploy,but the latency is big.
so i set opt_level=3, but it gives some errors:
Traceback (most recent call last):
File "cpd_mobilenetv3_deploy_on_android.py", line 63, in <module>
graph, lib, params = relay.build(func, target=target, params=params)
File "/workspace/tvm_new/tvm/python/tvm/relay/build_module.py", line 356, in build
params)
File "/workspace/tvm_new/tvm/python/tvm/relay/build_module.py", line 183, in build
self._build(func, target, target_host)
File "/workspace/tvm_new/tvm/python/tvm/_ffi/_ctypes/function.py", line 209, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (8) /workspace/tvm_new/tvm/build/libtvm.so(+0x62968d) [0x7f88fce0068d]
[bt] (7) /workspace/tvm_new/tvm/build/libtvm.so(+0x6ff3db) [0x7f88fced63db]
[bt] (6) /workspace/tvm_new/tvm/build/libtvm.so(tvm::relay::InferType(tvm::relay::Expr const&, tvm::relay::Module const&)+0x3fd) [0x7f88fced613d]
.........
.........
%298 = multiply(%37, %297)Incompatible broadcast type TensorType([1, 3, 44, 44, 8], float32) and TensorType([1, 0, 44, 44, 8], float32);
my codes as following:
import nnvm
import os
import numpy as np
from PIL import Image
import tvm
import tvm.relay as relay
from tvm import rpc
from tvm.contrib import util, ndk, graph_runtime
import onnx
import logging
#logging.getLogger().setLevel(logging.DEBUG)
def preprocess_image(image_file):
resized_image = Image.open(image_file).convert('RGB').resize((352, 352))
image_data = np.asarray(resized_image)[np.newaxis, :, :, :].astype(np.float32)
x_1 = image_data
x_2 = np.concatenate(( (x_1[:, :, :, 0]- 124.16)/58.624,(x_1[:, :, :,1]-116.736)/57.344 ), axis=0)
x_1 = np.concatenate(( x_2, (x_1[:, :, :, 2]- 103.936)/57.6), axis=0)
x = np.expand_dims(x_1,axis=0)
return x
if __name__ == "__main__":
image_file = 'cat.png'
image_data = preprocess_image(image_file)
model_file = './models/mobilenetv3_small_cpd.onnx'
onnx_model = onnx.load(model_file)
from tvm import relay
local_demo = False
test_target = 'cpu'
target = 'llvm -target=arm64-linux-android -mattr=+neon'
target_host = None
if local_demo:
target_host = None
target = 'llvm'
elif test_target == 'opencl':
target_host = target
target = 'opencl'
elif test_target == 'vulkan':
target_host = target
target = 'vulkan'
input_name = '0'
input_shape = (1, 3, 352, 352)
shape_dict = {input_name: input_shape}
func, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with relay.build_config(opt_level=3):
graph, lib, params = relay.build(func, target=target, params=params)
tmp = util.tempdir()
lib_fname = tmp.relpath('net.so')
fcompile = ndk.create_shared if not local_demo else None
lib.export_library(lib_fname, fcompile)
#save model to local
if True:
libpath = 'cpd.so'
lib.export_library(libpath,fcompile)
graph_json_path = 'cpd.json'
with open(graph_json_path,'w') as fo:
fo.write(graph)
param_path='cpd.params'
with open(param_path,'wb') as fo:
fo.write(relay.save_param_dict(params))
tracker_host = os.environ.get('TVM_TRACKER_HOST', '0.0.0.0')
tracker_port = 9195
key = 'android'
if local_demo:
remote = rpc.LocalSession()
else:
tracker = rpc.connect_tracker(tracker_host, tracker_port)
# When running a heavy model, we should increase the `session_timeout`
remote = tracker.request(key, priority=0,
session_timeout=60)
if local_demo:
ctx = remote.cpu(0)
elif test_target == 'opencl':
ctx = remote.cl(0)
elif test_target == 'vulkan':
ctx = remote.vulkan(0)
else:
ctx = remote.cpu(0)
# upload the library to remote device and load it
remote.upload(lib_fname)
rlib = remote.load_module('net.so')
module = graph_runtime.create(graph, rlib, ctx)
module.set_input(**params)
# set input data
module.set_input(input_name, tvm.nd.array(image_data))
# run
module.run()
# get output
out = module.get_output(0)
# get top1 result
top1 = np.argmax(out.asnumpy())
print('TVM prediction top-1: {}'.format(top1))
print('Evaluate inference time cost...')
ftimer = module.module.time_evaluator('run', ctx, number=1, repeat=10)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print('Mean inference time (std dev): %.2f ms (%.2f ms)' % (np.mean(prof_res),
np.std(prof_res)))