Hi, my friends.
First, I generate assembly code by Module.get_source(“asm”).
then I compile the asm code into a *.o file and link to a main function like this: [this .o file can be load by tvm.runtime.load_module and run ]
#include "tvm/runtime/c_runtime_api.h"
#include "tvm/runtime/c_backend_api.h"
#include "dlpack/dlpack.h"
extern "C" TVM_DLL int32_t default_function(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle);
int main()
{
float *a = new float[1024*1024];
for(uint32_t i=0; i<1024*1024; i++) {
a[i] = i;
}
float *b = new float[1024*1024];
int64_t shapeCommon[] = {1024,1024};
int64_t strideCommon[] = {1024, 1};
DLTensor input1;
input1.data = (void*)a;
input1.ctx = {kDLCPU, 0};
input1.ndim = 2;
input1.dtype = {2, 32, 1};
input1.shape = shapeCommon;
input1.strides = strideCommon;
input1.byte_offset = 0;
DLTensor input2;
input2.data = (void*)a;
input2.ctx = {kDLCPU, 0};
input2.ndim = 2;
input2.dtype = {2, 32, 1};
input2.shape = shapeCommon;
input2.strides = strideCommon;
input2.byte_offset = 0;
DLTensor output;
output.data = (void*)b;
output.ctx = {kDLCPU, 0};
output.ndim = 2;
output.dtype = {2, 32, 1};
output.shape = shapeCommon;
output.strides = strideCommon;
output.byte_offset = 0;
void* args[] = {(void*)(&input1), (void*)(&input2), (void*)(&output)};
int32_t arg_type_ids[] = {kTVMDLTensorHandle,kTVMDLTensorHandle,kTVMDLTensorHandle};
default_function(args, arg_type_ids, 3, nullptr, nullptr, nullptr);
}