Hi,
Is there a way to disable initialization of my tensor C in the te.compute or a way to initialize the variable C in my program before calculating with te.compute ?
Out = te.compute(
(batch_size, out_channels, out_h, out_w),
lambda batch, out_channels, yy, xx: te.sum(
A[batch, axe_in_channels, yy * stride_h + axe_kernel_h * dilation_h, xx * stride_w + axe_kernel_w * dilation_w]* W[out_channels, axe_in_channels, axe_kernel_h, axe_kernel_w],
axis=[axe_in_channels, axe_kernel_h, axe_kernel_w],)
)
As you can see below when I do print(tvm.lower(…)) there is an initialization, I would like to do a loop permutation and this initialization prevents me from doing what I want.
Cannot find config for target=llvm -keys=cpu -link-params=0 -mcpu=core-avx2, workload=('conv2d_ttile_', 1, 56, 56, 128, 128, 3, 3, 1, 1, 1, 1, 1, 1). A fallback configuration is used, which may bring great performance regression.
primfn(A_1: handle, W_1: handle, compute_1: handle) -> ()
attr = {"global_symbol": "main", "tir.noalias": True}
buffers = {compute: Buffer(compute_2: Pointer(float32), float32, [1, 128, 56, 56], []),
W: Buffer(W_2: Pointer(float32), float32, [128, 128, 3, 3], []),
A: Buffer(A_2: Pointer(float32), float32, [1, 128, 58, 58], [])}
buffer_map = {A_1: A, W_1: W, compute_1: compute} {
for (out_channels: int32, 0, 128) {
for (yy: int32, 0, 56) {
for (xx: int32, 0, 56) {
compute_2[(((out_channels*3136) + (yy*56)) + xx)] = 0f32
for (axe_in_channels: int32, 0, 128) {
for (axe_kernel_h: int32, 0, 3) {
for (axe_kernel_w: int32, 0, 3) {
compute_2[(((out_channels*3136) + (yy*56)) + xx)] = ((float32*)compute_2[(((out_channels*3136) + (yy*56)) + xx)] + ((float32*)A_2[(((((axe_in_channels*3364) + (yy*58)) + (axe_kernel_h*58)) + xx) + axe_kernel_w)]*(float32*)W_2[((((out_channels*1152) + (axe_in_channels*9)) + (axe_kernel_h*3)) + axe_kernel_w)]))
}
}
}
}
}
}
}
Thank you