I built two systemlib models with CUDA
and got
terminate called after throwing an instance of 'tvm::runtime::InternalError'
what(): [11:58:15] ../../src/runtime/library_module.cc:85: InternalError: Check failed: ret == 0 (-1 vs. 0) : InternalError: Check failed: (f != nullptr) is false: Cannot find function tvmgen_model2_fused_add_rsqrt_multiply_kernel in the imported modules or global registry. If this involves ops from a contrib library like cuDNN, ensure TVM was built with the relevant library.
Stack trace:
File "../../src/runtime/module.cc", line 119
...
Stack trace:
...
[bt] (7) /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0x7f02fd5dc083]
[bt] (8) ./inference(+0x154de) [0x5629d37ad4de]
[1] 28786 abort (core dumped) ./inference
When building with 1 model with CUDA, it works fine. With 2 systemlib models, it works fine when building with llvm
. Please find the code snippets here
Python
def build_module():
dshape = (1, 3, 224, 224)
from mxnet.gluon.model_zoo.vision import get_model
block = get_model("mobilenet0.25", pretrained=True)
shape_dict = {"data": dshape}
mod, params = relay.frontend.from_mxnet(block, shape_dict)
func = mod["main"]
func = relay.Function(
func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs
)
models = [
("model1", Runtime("cpp", {"system-lib": True})),
("model2", Runtime("cpp", {"system-lib": True})),
]
for name, runtime in models:
with tvm.transform.PassContext(opt_level=3):
graph, lib, params = relay.build(func, "cuda", runtime=runtime, params=params, mod_name=name)
build_dir = os.path.abspath("build")
if not os.path.isdir(build_dir):
os.makedirs(build_dir)
lib_file_name = os.path.join(build_dir, f"{name}.tar")
lib.export_library(lib_file_name)
with open(os.path.join(build_dir, f"{name}.json"), "w") as f_graph_json:
f_graph_json.write(graph)
with open(os.path.join(build_dir, f"{name}.params"), "wb") as f_params:
f_params.write(tvm_runtime.save_param_dict(params))
C++
void RunModel1() {
LOG(INFO) << "Running graph executor1...";
std::string json1 = json_data("./build/model1.json");
tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("runtime.SystemLib"))();
// create the graph executor module
int dev_type = kDLCUDA;
int dev_id = 0;
tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(
json1, mod_syslib, dev_type, dev_id);
tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
tvm::runtime::PackedFunc run = mod.GetFunction("run");
// Use the C++ API
DLDevice dev{kDLCUDA, 0};
tvm::runtime::NDArray x =
tvm::runtime::NDArray::Empty({3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev);
// set the right input
set_input("data", x);
// run the code
run();
}
void RunModel2() {
LOG(INFO) << "Running graph executor2...";
std::string json1 = json_data("./build/model2.json");
tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("runtime.SystemLib"))();
// create the graph executor module
int dev_type = kDLCUDA;
int dev_id = 0;
tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(
json1, mod_syslib, dev_type, dev_id);
tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
tvm::runtime::PackedFunc run = mod.GetFunction("run");
// Use the C++ API
DLDevice dev{kDLCUDA, 0};
tvm::runtime::NDArray x =
tvm::runtime::NDArray::Empty({3, 224, 224}, DLDataType{kDLFloat, 32, 1}, dev);
// set the right input
set_input("data", x);
// run the code
run();
// get the output
// get_output(0, y);
}
int main(void) {
RunModel1();
RunModel2();
}
Makefile
TVM_ROOT=$(shell cd ../..; pwd)
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
CUDA_HOME=/usr/local/cuda
PKG_CFLAGS = -std=c++17 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-I/usr/local/cuda/include\
-DDMLC_USE_LOGGING_LIBRARY=\<tvm/runtime/logging.h\>
PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -pthread
tvm_runtime_pack.o: tvm_runtime_pack.cc
$(CXX) -c $(PKG_CFLAGS) -o $@ $^
deploy: tvm_runtime_pack.o
$(CXX) $(PKG_CFLAGS) -Wl,--allow-multiple-definition -o inference inference.cc tvm_runtime_pack.o build/model1/lib0.o build/model1/devc.o build/model2/lib0.o build/model2/devc.o $(PKG_LDFLAGS) -L/usr/local/cuda/lib64 -lcuda -lcudart
Note that
- I already link
lib0.o
anddevc.o
of both models to the binary.