I am trying to inference a resnet18 model on an android device, currently working on CPU backend.
I have successfully built the tvm runtime with arm64-v8a abi and I have transormed the model from onnx to format which in my opinion TVM could recognize, ie, files consisting of .so,.json,*.params. The transforming script is something like this:
import onnx
import numpy as np
import tvm
import tvm.relay as relay
import os
from tvm.contrib import utils, ndk
onnx_model = onnx.load('assets/resnet18.onnx')
x = np.ones([1,3,224,224]) #输入的tensor shape
target = "llvm -mtriple=arm64-linux-android" #编译的目标架构
input_name = 'input.1' #网络输入节点名
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build(sym, target=target, params=params)
# lib.export_library("deploy.so", cc="/opt/android-toolchain-arm64/bin/aarch64-linux-android24-clang++")
lib.export_library("deploy.so", ndk.create_shared) #需要声明环境变量 export TVM_NDK_CC
graph_json_path = "deploy.json"
with open(graph_json_path, 'w') as fo:
fo.write(graph)
param_path = "deploy.params"
with open(param_path, 'wb') as fo:
fo.write(relay.save_param_dict(params))
The inference C++ source code is as follows:
int main()
{
// tvm module for compiled functions
tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so");
// json graph
// std::ifstream json_in("deploy.json", std::ios::in);
std::ifstream json_in("deploy.json");
std::string json_data((std::istreambuf_iterator<char>(json_in)), std::istreambuf_iterator<char>());
json_in.close();
// parameters in binary
// std::ifstream params_in("deploy.params", std::ios::binary);
std::ifstream params_in("deploy.params");
std::string params_data((std::istreambuf_iterator<char>(params_in)), std::istreambuf_iterator<char>());
params_in.close();
// parameters need to be TVMByteArray type to indicate the binary data
TVMByteArray params_arr;
params_arr.data = params_data.c_str();
params_arr.size = params_data.length();
int dtype_code = kDLFloat;
int dtype_bits = 32;
int dtype_lanes = 1;
int device_type = kDLCPU;
int device_id = 0;
// get global function module for graph runtime
tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(json_data, mod_syslib, device_type, device_id);
DLTensor *x;
int in_ndim = 4;
int64_t in_shape[4] = {1, 3, 224, 224};
TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
// load image
float *buf_ = NULL;
size_t size = tvm::runtime::GetDataSize(*x);
ReadImg("imagenet_cat.png", in_shape[1], in_shape[2], in_shape[3], &buf_);
TVMArrayCopyFromBytes(x, buf_, size);
free(buf_);
printf("input size:%ld\nshape:\n", size);
for (int i = 0; i < x->ndim; i++)
{
printf("%ld ", x->shape[i]);
}
printf("\n");
// get the function from the module(set input data)
tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
set_input("input", x);
// get the function from the module(load patameters)
tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params");
load_params(params_arr);
// get the function from the module(run it)
tvm::runtime::PackedFunc run = mod.GetFunction("run");
run();
DLTensor *y;
int out_ndim = 2;
int64_t out_shape[2] = {1, 1000};
TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
// get the function from the module(get output data)
tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
get_output(0, y);
// get the maximum position in output vector
auto y_iter = static_cast<float *>(y->data);
auto max_iter = std::max_element(y_iter, y_iter + 1000);
auto max_index = std::distance(y_iter, max_iter);
std::cout << "The maximum position in output vector is: " << max_index << std::endl;
TVMArrayFree(x);
TVMArrayFree(y);
return 0;
}
All dependcies are been pushed to device and the binary is built and linked without wrong, but the execution process doesn’t go on well, running with exception:
/home/op/workspace/tvm/buildruntime/libtvm_runtime.so: 1 file pushed. 35.9 MB/s (47169288 bytes in 1.253s)
terminating with uncaught exception of type tvm::runtime::InternalError: [10:20:59] /home/op/workspace/tvm/3rdparty/dmlc-core/include/dmlc/json.h:732: Check failed: ch == '{' (-1 vs. {) : Error at Line 0, around ^``, Expect '{' but get '�'
Stack trace not available when DMLC_LOG_STACK_TRACE is disabled at compile time.
Is there anything wrong with my pipeline?
Any information is in need, Thanks!