Hello, I’m curious about how to invoke a tvm generated kernel (in topi, or generated by auto-scheduler) in C++? (As in Tiramisu compiler, the codegen will generate a .o file to link with, then I’m able to invoke the tiramisu generated kernel as C++ API). For example, I use following code to find a optimal implementation of Conv2D(512, 512, 7x7, 3x3).
import os
import numpy as np
import tvm
from tvm import te, auto_scheduler, topi
from tvm.topi.testing import conv2d_nchw_python
@auto_scheduler.register_workload
def conv2d_layer(N, H, W, CO, CI, KH, KW, stride, padding):
data = te.placeholder((N, CI, H, W), name="data")
kernel = te.placeholder((CO, CI, KH, KW), name="kernel")
bias = te.placeholder((1, CO, 1, 1), name="bias")
conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=1, out_dtype="float32")
out = topi.nn.relu(conv + bias)
return [data, kernel, bias, out]
target = tvm.target.Target("llvm")
N, H, W, CO, CI, KH, KW, stride, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
task = auto_scheduler.SearchTask(
func=conv2d_layer, args=(N, H, W, CO, CI, KH, KW, stride, padding), target=target
)
print(task.compute_dag)
log_file = "conv2d.json"
measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=10,
runner=measure_ctx.runner,
measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
verbose=2,
)
task.tune(tune_option)
sch, args = task.apply_best(log_file)
del measure_ctx
print(tvm.lower(sch, args, simple_mode=True))
Then I want to use the generated kernel in C++ like this:
conv2d_layer(input.buffer(), weight.buffer(), bias.buffer(), out.buffer())
Is this possible?