Tensor/Buffer not addressable

I am trying to write a compute and schedule that generates C code for a custom accelerator. I am doing a simple convolution first with NCHW layout. There are several constraints:

  • input, kernel and conv output buffers (or sub-buffers) need to be cached. This is achieved with DMA copies using the Inject Copy Intrinsic functionality

  • the conv computation needs to be done using an intrinsic

      temp = my_pad(Input, pad_before, pad_after, name="pad_temp")
    
      weight_buf = te.compute((num_filter, channel, kernel_h, kernel_w), lambda *i: Filter(*i), "weight_buf")
    
      conv_result = te.compute(
    
      (batch, out_channel, out_height, out_width),
      lambda nn, ff, yy, xx: 
          te.sum(
              temp[nn, rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype(out_dtype)
                  * 
                  weight_buf[ff, rc, ry, rx].astype(out_dtype),
              axis=[rc, ry, rx],
          ),
      name="conv_result",
      tag="conv2d_nchw",
    )
    
    out_buf = te.compute((batch, in_channel, in_height, in_width), lambda *i: conv_result(*i), "out_buf")
    

and the schedule is:

conv_ax_0,conv_axis_ff,conv_axis_yy,conv_axis_xx = op_by_name["conv_result"].axis

rc,ry,rx = op_by_name["conv_result"].reduce_axis

s[op_by_name["pad_temp"]].compute_at(s[op_by_name["conv_result"]], conv_axis_yy)
s[op_by_name["pad_temp"]].pragma(s[op_by_name["pad_temp"]].op.axis[2], "dma_copy")
s[op_by_name["pad_temp"]].set_scope("local.inp_buffer")

s[op_by_name["weight_buf"]].compute_at(s[op_by_name["conv_result"]], conv_axis_ff)
s[op_by_name["weight_buf"]].pragma(s[op_by_name["weight_buf"]].op.axis[0], "dma_copy")
s[op_by_name["weight_buf"]].set_scope("local.wgt_buffer")

s[op_by_name["conv_result"]].compute_at(s[op_by_name["out_buf"]], s[op_by_name["out_buf"]].op.axis[1])
s[op_by_name["conv_result"]].set_scope("local.acc_buffer")

_, in_channel, in_height, in_width = op_by_name["conv_result"].input_tensors[0].shape
_, out_channels, k_height, k_width = op_by_name["conv_result"].input_tensors[1].shape
ciha_prod = intrin_dotproduct(out_channels, in_channel, in_width, k_height, k_width)
#print(ciha_prod)
s[op_by_name["conv_result"]].tensorize(rc, ciha_prod)

s[op_by_name["out_buf"]].pragma(s[op_by_name["out_buf"]].op.axis[2], "dma_copy")
s[op_by_name["out_buf"]].parallel(s[op_by_name["out_buf"]].op.axis[1])

However, when I try to tensorize “conv_result” I get an error:

 (info->head_address.defined()) is false: conv_result is not adddressable

Without tensorize when inspecting the C code everything looks ok (note the DMA intrinsic and buffers scopes are taken from VTA):

void* weight_buf = TVMBackendAllocWorkspace(1, dev_id, (uint64_t)18432, 2, 32);
if (weight_buf == NULL) {
  return -1;
}
void* conv_result = TVMBackendAllocWorkspace(1, dev_id, (uint64_t)196, 2, 32);
if (conv_result == NULL) {
  return -1;
}
void* pad_temp = TVMBackendAllocWorkspace(1, dev_id, (uint64_t)55296, 2, 32);
if (pad_temp == NULL) {
  return -1;
}
(void)VTALoadBuffer2D(ctx_cache_, placeholder1, (i1 * 18), 1, 4608, 0, 0, 0, 0, 0, 0, weight_buf, 5);
for (int32_t yy = 0; yy < 7; ++yy) {
  for (int32_t i11 = 0; i11 < 512; ++i11) {
    int32_t _1 = 1 - yy;
    int32_t _2 = yy - 5;
    (void)VTALoadBuffer2D(ctx_cache_, placeholder, (((((i11 * 49) + (yy * 7)) + (((_1) > (0) ? (_1) : (0)) * 7)) - 7) >> 4), 1, 1, 1, 1, ((_1) > (0) ? (_1) : (0)), 1, ((_2) > (0) ? (_2) : (0)), ((i11 * 27) / 4), pad_temp, 5);
  }
  for (int32_t xx = 0; xx < 7; ++xx) {
    ((float*)conv_result)[(((yy * 7) + xx))] = 0.000000e+00f;
    for (int32_t rc = 0; rc < 512; ++rc) {
      for (int32_t ry = 0; ry < 3; ++ry) {
        for (int32_t rx = 0; rx < 3; ++rx) {
          ((float*)conv_result)[(((yy * 7) + xx))] = (((float*)conv_result)[(((yy * 7) + xx))] + (((float*)pad_temp)[(((((rc * 27) + (ry * 9)) + xx) + rx))] * ((float*)weight_buf)[((((rc * 9) + (ry * 3)) + rx))]));
        }
      }
    }
  }
}
(void)VTAStoreBuffer2D(ctx_cache_, 0, conv_result, 4, out_buf, ((i1 * 49) >> 4), 1, 49, 0);

Do you have any ideas what this error means ?