Hello,
I’ve been trying to do something similar - deploy a Relax IR module from C++, and was able to get a simple example working.
Relax IR
import numpy as np
import tvm
from tvm import relax
from tvm.script import ir as I
from tvm.script import relax as R
from tvm.script import tir as T
@I.ir_module
class TVMScriptModule:
@T.prim_func
def addone(A_handle: T.handle, B_handle: T.handle) -> None:
m = T.int64()
n = T.int64()
A = T.match_buffer(A_handle, (m, n), "int32")
B = T.match_buffer(B_handle, (m, n), "int32")
T.func_attr(({"global_symbol": "addone"}))
for i, j in T.grid(m, n):
with T.block("addone"):
vi, vj = T.axis.remap("SS", [i, j])
B[vi, vj] = A[vi, vj] + T.int32(1)
@R.function
def main(x: R.Tensor(("m", "n"), "int32")):
m, n = T.int64(), T.int64()
gv0 = R.call_tir(TVMScriptModule.addone, (x,), R.Tensor((m, n), dtype="int32"))
return gv0
mod = TVMScriptModule
mod.show() # pylint: disable=maybe-no-member
mod = relax.transform.LegalizeOps()(mod)
mod.show() # pylint: disable=maybe-no-member
mod = relax.get_pipeline("zero")(mod) # pylint: disable=no-value-for-parameter
mod.show() # pylint: disable=maybe-no-member
target = tvm.target.Target("llvm")
executable = relax.build(mod, target, exec_mode="compiled")
executable.export_library("compiled_artifact.so")
dev = tvm.cpu()
vm = relax.VirtualMachine(executable, dev)
data: tvm.runtime.NDArray = tvm.nd.array(np.array([[1, 2, 3], [1, 2, 3]], dtype=np.int32),
device=dev)
cpu_out = vm["main"](data).numpy()
print(cpu_out)
loaded_mod: tvm.runtime.Module = tvm.runtime.load_module("compiled_artifact.so")
vm1 = relax.VirtualMachine(loaded_mod, dev)
cpu_out1 = vm1["main"](data).numpy()
print(cpu_out1)
C++ code to deploy compiled_artifact.so:
#include <iostream>
#include <tvm/runtime/relax_vm/executable.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/memory/memory_manager.h>
#include <tvm/runtime/data_type.h>
using tvm::runtime::relax_vm::Executable;
using tvm::runtime::Module;
using tvm::runtime::PackedFunc;
using tvm::runtime::memory::AllocatorType;
int main()
{
std::string path = "./compiled_artifact.so";
// Load the shared object into a Module.
Module m = Module::LoadFromFile(path);
std::cout << m << std::endl;
PackedFunc vm_load_executable = m.GetFunction("vm_load_executable");
CHECK(vm_load_executable != nullptr)
<< "Error: File `" << path
<< "` is not built by RelaxVM, because `vm_load_executable` does not exist";
// Create a VM from the Executable in the Module.
Module mod = vm_load_executable();
PackedFunc vm_initialization = mod.GetFunction("vm_initialization");
CHECK(vm_initialization != nullptr)
<< "Error: File `" << path
<< "` is not built by RelaxVM, because `vm_initialization` does not exist";
// Initialize the VM
tvm::Device device{kDLCPU, 0};
vm_initialization(static_cast<int>(device.device_type), static_cast<int>(device.device_id),
static_cast<int>(AllocatorType::kPooled), static_cast<int>(kDLCPU), 0,
static_cast<int>(AllocatorType::kPooled));
PackedFunc main = mod.GetFunction("main");
CHECK(main != nullptr)
<< "Error: File `" << path
<< "` does not contain the expected entry function, `main`";
// Create and initialize the input array
auto i32 = tvm::runtime::DataType::Int(32);
tvm::runtime::NDArray input = tvm::runtime::NDArray::Empty({3, 3}, i32, device);
int numel = input.Shape()->Product();
for (int i = 0; i < numel; ++i)
static_cast<int*>(input->data)[i] = 42;
// Run the main function
tvm::runtime::NDArray output = main(input);
for (int i = 0; i < numel; ++i)
std::cout << static_cast<int*>(output->data)[i] << std::endl;
}