Things are like this: I am trying to do convolution of two 6-dimention tensor with tensor expression, and I first let them do im2col, then i do padding and matrix multiplication. After all, I trun the result to 6-dimension tensor again. Here is my code of computation definition:
data = tvm.te.placeholder((1, 4, 7, 7, 1, 16), dtype="int8", name="data")
kernel = tvm.te.placeholder((4, 4, 3, 3, 16, 16), dtype="int8", name="kernel")
ishape = topi.utils.get_const_tuple(data.shape)
kshape = topi.utils.get_const_tuple(kernel.shape)
input_channel = ishape[1] * ishape[5]
output_channel = kshape[0] * kshape[4]
window_size = kshape[2] * kshape[3]
oheight = topi.utils.get_const_int((data.shape[2] - kernel.shape[2]) + 1)
owidth = topi.utils.get_const_int((data.shape[3] - kernel.shape[3]) + 1)
oshape = (data.shape[0], kernel.shape[0], oheight, owidth, data.shape[4], kernel.shape[4])
mdata = te.compute(
(oheight*owidth, input_channel * window_size),
lambda i, j: data[0, (j % input_channel) // ishape[5], i // owidth + (j // input_channel) // kshape[2], i % owidth + (j // input_channel) % kshape[2], 0, (j // input_channel) % ishape[5]],
name="mdata"
)
if (oheight*owidth)%16 == 0:
padded_length = oheight*owidth
else:
padded_length = ((oheight*owidth)//16+1)*16
mdata_pad = te.compute(
(padded_length, input_channel * window_size),
lambda i, j: te.if_then_else(
(i < oheight*owidth),
mdata[i, j],
0
),
name="mdata_pad",
tag="mdata_pad"
)
mkernel = te.compute(
(output_channel, input_channel * window_size),
lambda i, j: kernel[i//kshape[4], (j%input_channel)//kshape[5], (j//input_channel)//kshape[2], (j//input_channel)%kshape[2], i%kshape[4], (j%input_channel)%kshape[5]],
name="mkernel"
)
k = te.reduce_axis((0, input_channel * window_size), name="k")
middle = te.compute(
(padded_length, output_channel),
lambda i, j: te.sum(
mdata_pad[i, k].astype("int32")*mkernel[j, k].astype("int32"),
axis=[k]
),
name="middle"
)
middle_unpad = te.compute(
(oheight*owidth, output_channel),
lambda i, j: middle[i, j],
name="middle_unpad"
)
out = te.compute(
oshape,
lambda no, co, h, w, ni, ci: middle_unpad[(no*oshape[4] + ni)*oheight*owidth+h*owidth+w, co*oshape[5] + ci],
name="out"
)
Now I created the schedule of computation above
s = te.create_schedule(out.op)
code = tvm.lower(s, [data, kernel, out], simple_mode=True)
print(code)
And I got tir, but the shape of middle_pad seem to have something wrong, it should be (32, 576), but the tir got (25, 576). I did not see middle_unpad either. Here is the tir:
for (i: int32, 0, 25) {
for (j: int32, 0, 576) {
mdata[((i*576) + j)] = (int8*)data_2[((((((floordiv(floormod(j, 64), 16)*784) + (floordiv(j, 192)*112)) + (floordiv(i, 5)*112)) + (floordiv(floormod(j, 192), 64)*16)) + (floormod(i, 5)*16)) + floordiv(j, 64))]
}
}
for (i_1: int32, 0, 25) {
for (j_1: int32, 0, 576) {
mdata_pad[((i_1*576) + j_1)] = cast(int32, (int8*)mdata[((i_1*576) + j_1)])
}
}
for (i_2: int32, 0, 64) {
for (j_2: int32, 0, 576) {
mdata[((i_2*576) + j_2)] = (int8*)kernel_2[(((((floordiv(i_2, 16)*9216) + (floordiv(floormod(j_2, 64), 16)*2304)) + (floordiv(j_2, 64)*256)) + (floormod(i_2, 16)*16)) + floormod(j_2, 16))]
}
}
for (i_3: int32, 0, 25) {
for (j_3: int32, 0, 64) {
middle[((i_3*64) + j_3)] = 0
for (k: int32, 0, 576) {
middle[((i_3*64) + j_3)] = ((int32*)middle[((i_3*64) + j_3)] + ((int32*)mdata_pad[((i_3*576) + k)]*cast(int32, (int8*)mdata[((j_3*576) + k)])))
}
}
}
for (co: int32, 0, 4) {
for (h: int32, 0, 5) {
for (w: int32, 0, 5) {
for (ci: int32, 0, 16) {
out_2[((((co*400) + (h*80)) + (w*16)) + ci)] = (int32*)middle[((((h*320) + (w*64)) + (co*16)) + ci)]
}
}
}
}
}
Also I think there is something wrong with mkernel, the third iteration should be mkernel, but it is mdata. I guess there should be some rules that multiple tensor expression lowering should follow, but I have not see any tutorials talk about this. If anyone have any idea about this, please help me, thank you!