I encountered the following problems when using the official two docker files (cpu and nvidia-gpu).
(1) tvmai/demo-cpu
$ cd $TVM_PATH
$ ./docker/bash.sh tvmai/demo-cpu
After a period of time (10 minutes), the docker tvmai/demo-cpu has been successfully compiled and entered.
Mount the inference code in the docker and execute the inference. There will be some mistakes as followed:
$ python3 tune_network_x86_test.py
Traceback (most recent call last):
File "tune_network_x86_test.py", line 3, in <module>
import tvm
File "/usr/tvm/python/tvm/__init__.py", line 27, in <module>
from . import tensor
File "/usr/tvm/python/tvm/tensor.py", line 20, in <module>
from ._ffi.node import NodeBase, NodeGeneric, register_node, convert_to_node
File "/usr/tvm/python/tvm/_ffi/node.py", line 24, in <module>
from .node_generic import NodeGeneric, convert_to_node, const
File "/usr/tvm/python/tvm/_ffi/node_generic.py", line 23, in <module>
from .base import string_types
File "/usr/tvm/python/tvm/_ffi/base.py", line 60, in <module>
_LIB, _LIB_NAME = _load_lib()
File "/usr/tvm/python/tvm/_ffi/base.py", line 52, in _load_lib
lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_GLOBAL)
File "/usr/lib/python3.6/ctypes/__init__.py", line 348, in __init__
self._handle = _dlopen(self._name, mode)
OSError: /usr/tvm/build/libtvm.so: undefined symbol: tvm_ecall_packed_func
(2) tvmai/demo-gpu
$ cd $TVM_PATH
$ ./docker/bash.sh tvmai/demo-gpu
After a period of time (10 minutes), the docker tvmai/demo-cpu has been successfully compiled and entered.
Mount the inference code in the docker and execute the inference. There will be some mistakes as followed:
$ python3 tune_relay_cuda_test.py
Traceback (most recent call last):
File "tune_relay_cuda_test.py", line 10, in <module>
import tvm.contrib.graph_executor as runtime
ModuleNotFoundError: No module named 'tvm.contrib.graph_executor'
(3) Inference Code of CPU and NVIDIA-GPU
3.1 Inference Code of CPU “tune_network_x86_test.py”
Please mount the inference code in the /workspace/tvm_demo/
import numpy as np
import tvm
from tvm import relay, auto_scheduler
from tvm.relay import data_dep_optimization as ddo
import tvm.relay.testing
from tvm.contrib import graph_executor
def get_network(name, batch_size, layout="NHWC", dtype="float32", use_sparse=False):
"""Get the symbol definition and random weight of a network"""
# auto-scheduler prefers NHWC layout
if layout == "NHWC":
image_shape = (224, 224, 3)
elif layout == "NCHW":
image_shape = (3, 224, 224)
else:
raise ValueError("Invalid layout: " + layout)
input_shape = (batch_size,) + image_shape
output_shape = (batch_size, 1000)
if name.startswith("resnet-"):
n_layer = int(name.split("-")[1])
mod, params = relay.testing.resnet.get_workload(
num_layers=n_layer,
batch_size=batch_size,
layout=layout,
dtype=dtype,
image_shape=image_shape,
)
else:
raise ValueError("Network not found.")
if use_sparse:
from tvm.topi.sparse.utils import convert_model_dense_to_sparse
mod, params = convert_model_dense_to_sparse(mod, params, bs_r=4, random_params=True)
return mod, params, input_shape, output_shape
network = "resnet-50"
use_sparse = False
batch_size = 1
layout = "NHWC"
target = tvm.target.Target("llvm")
dtype = "float32"
log_file = "%s-%s-B%d-%s.json" % (network, layout, batch_size, target.kind.name)
print("Get model...")
mod, params, input_shape, output_shape = get_network(
network,
batch_size,
layout,
dtype=dtype,
use_sparse=use_sparse,
)
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, params=params)
# Create graph executor
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input("data", data_tvm)
# Evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500)
prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
3.1 Inference Code of NVIDIA-GPU “tune_relay_cuda_test.py”
Please mount the inference code in the /workspace/tvm_demo/
import os
import numpy as np
import tvm
from tvm import relay, autotvm
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
import tvm.contrib.graph_executor as runtime
def get_network(name, batch_size):
"""Get the symbol definition and random weight of a network"""
input_shape = (batch_size, 3, 224, 224)
output_shape = (batch_size, 1000)
if "resnet" in name:
n_layer = int(name.split("-")[1])
mod, params = relay.testing.resnet.get_workload(
num_layers=n_layer, batch_size=batch_size, dtype=dtype
)
else:
raise ValueError("Unsupported network: " + name)
return mod, params, input_shape, output_shape
target = tvm.target.cuda()
network = "resnet-18"
log_file = "%s.log" % network
dtype = "float32"
def tune_and_evaluate():
# extract workloads from relay program
print("Extract tasks...")
mod, params, input_shape, out_shape = get_network(network, batch_size=1)
with tvm.transform.PassContext(opt_level=3):
lib = relay.build_module.build(mod, target=target, params=params)
# load parameters
dev = tvm.device(str(target), 0)
module = runtime.GraphModule(lib["default"](dev))
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input("data", data_tvm)
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", dev, number=1, repeat=600)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print(
"Mean inference time (std dev): %.2f ms (%.2f ms)"
% (np.mean(prof_res), np.std(prof_res))
)
tune_and_evaluate()
Could you help me solved these problem?