How to print out the C code generated by DNNl?

In the article " How to Bring Your Own Codegen to TVM", there is an example of using DNNL to generate the following C code:

// The example Relay graph: conv2d -> add -> relu.
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/container.h>
#include <tvm/runtime/packed_func.h>
#include <dlpack/dlpack.h>
#include <dnnl/dnnl_kernel.h>
using namespace tvm::runtime;
using namespace tvm::runtime::contrib;

// Execute the conv2d->add->relu graph with DNNL.
extern "C" void dnnl_0_(float* dnnl_0_i0, float* dnnl_0_i1,
                        float* dnnl_0_i2, float* out0) {
  // Allocate intermediate buffers.
  float* buf_0 = (float*)std::malloc(4 * 4608);
  float* buf_1 = (float*)std::malloc(4 * 4608);
  float* buf_2 = (float*)std::malloc(4 * 4608);

  // Pre-implemented op-based DNNL functions.
  dnnl_conv2d(dnnl_0_i0, dnnl_0_i1, buf_0, 1, 32, 14, 14, 32, 1, 0, 0, 3, 3, 1, 1);
  dnnl_add(buf_0, dnnl_0_i2, buf_1, 1, 32, 12, 12);
  dnnl_relu(buf_1, buf_2, 1, 32, 12, 12);

  // Copy the final output to the corresponding buffer.
  std::memcpy(out0, buf_2, 4 * 4608);
  std::free(buf_0);
  std::free(buf_1);
  std::free(buf_2);
}

// The wrapper function with all arguments in DLTensor type.
extern "C" int dnnl_0_wrapper_(DLTensor* arg0,
        DLTensor* arg1,
        DLTensor* arg2,
        DLTensor* out0) {

  // Cast all DLTensor to primitive type buffers and invoke the above
  // execution function.
  dnnl_0_(static_cast<float*>(arg0->data),
  static_cast<float*>(arg1->data),
  static_cast<float*>(arg2->data),
  static_cast<float*>(out0->data));
  return 0;
}

// The TVM macro to generate TVM runtime compatible function "dnnl_0"
// from our generated "dnnl_0_wrapper_".
TVM_DLL_EXPORT_TYPED_FUNC(dnnl_0, dnnl_0_wrapper_);

I would like to know the exact function call that generates the code and the location where the generated code is located.

1 Like

You can try the code below.

import tvm
from tvm import relay

    dtype = "float32"
    ishape = (1, 3, 224, 224)
    ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32")
    mod = relay.transform.AnnotateTarget(["dnnl"])(ref_mod)
    mod = relay.transform.MergeCompilerRegions()(mod)
    mod = relay.transform.PartitionGraph()(mod)
    with tvm.transform.PassContext(opt_level=3):
        graph_json, lib, params = relay.build(mod, target='llvm', params=ref_params)

    print(lib.imported_modules[1].get_source())