I use the latest version TVM from github. Now I want to customize the operator instead of nn.conv2d.
According to the articleHow to Bring Your Own Codegen to TVM (apache.org)
I have changed these places:
1.src/relay/backend/contrib/nuc_fpga/codegen.cc
GenerateBodyOutput GenerateOpCall(const CallNode* call) {
const auto* op_node = call->op.as<OpNode>();
CHECK(op_node) << "Expect OpNode, but got " << call->op->GetTypeKey();
using ArgFunType = std::function<std::vector<std::string>(const CallNode*)>;
static const std::map<std::string, std::pair<std::string, ArgFunType>> op_map = {
{"nn.conv2d", {"nuc_fpga_conv2d", Conv2d}},
{"nn.dense", {"nuc_fpga_dense", Dense}},
};
......
......
TVM_REGISTER_GLOBAL("relay.ext.nuc_fpga").set_body_typed(NucFPGACompiler);
2.src/runtime/contrib/nuc_fpga/nuc_fpga.cc
extern "C" void nuc_fpga_conv2d(
int8_t* feature_map, int8_t* kernel, int* out,
int batch, int in_ch, int in_height, int in_width,
int kernel_number, int group,
int pad_height, int pad_width,
int kernel_height, int kernel_width,
int stride_height, int stride_width) {
printf("Calling From nuc_fpga_conv2d\n");
}
3.src/runtime/contrib/nuc_fpga/nuc_fpga.h
extern "C" TVM_DLL void nuc_fpga_conv2d(
int8_t* feature_map, int8_t* kernel, int* out,
int batch, int in_ch, int in_height, int in_width,
int kernel_number, int group,
int pad_height, int pad_width,
int kernel_height, int kernel_width,
int stride_height, int stride_width);
4.python/tvm/relay/op/contrib/nuc_fpga.py
_register_external_op_helper("nn.conv2d")
5.cmake/modules/contrib/NUCFPGA.cmake
if(USE_NUCFPGA_CODEGEN STREQUAL "ON")
file(GLOB NUCFPGA_CODEGEN_SRC src/relay/backend/contrib/nuc_fpga/*.cc)
list(APPEND COMPILER_SRCS ${NUCFPGA_CODEGEN_SRC})
file(GLOB NUCFPGA_CONTRIB_SRCS src/runtime/contrib/nuc_fpga/nuc_fpga.cc)
list(APPEND RUNTIME_SRCS ${NUCFPGA_CONTRIB_SRCS})
message(STATUS "Build with contrib.nuc_fpga")
endif()
6.build/config.cmake
set(USE_NUCFPGA_CODEGEN ON)
Then cmake … and make j4
When I run the program(Resnet18):
extern_mod = relay.transform.AnnotateTarget(['nuc_fpga'])(mod)
extern_mod = relay.transform.MergeCompilerRegions()(extern_mod)
extern_mod = relay.transform.PartitionGraph()(extern_mod)
print("extern_mod:", extern_mod)
target = tvm.target.Target('llvm')
with tvm.transform.PassContext(opt_level=3):
grf_mod = relay.build(extern_mod, target=target, params=params)
The graph still is nn.conv2d not the nuc_fpga_conv2d
Part of extern_mod log is :
%0 = nn.conv2d(%input0, %conv1.0.weight, strides=[2, 2], padding=[3, 3, 3, 3], channels=64, kernel_size=[7, 7]) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%1 = nn.bias_add(%0, %conv1.0.bias) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%2 = nn.relu(%1) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%3 = nn.max_pool2d(%2, pool_size=[3, 3], strides=[2, 2], padding=[1, 1, 1, 1]) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%4 = nn.conv2d(%3, %layer1.0.conv1.0.weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%5 = nn.bias_add(%4, %layer1.0.conv1.0.bias) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%6 = nn.relu(%5) /* ty=Tensor[(1, 64, 56, 56), float32] */;
It should be nuc_fpga_conv2d instead of nn.conv2d,but not.
Did I do something wrong?
Thanks.