Hi everyone,
here a tricky TensorIR question
When looking at the TIR below, how can I unroll the block T_matmul_NT_init
?
Using get_block("T_matmul_NT_init")
runs into an FFI error:
the check here: https://github.com/apache/tvm/blob/main/src/tir/schedule/primitive/get_block_loop.cc#L31
Can you help us to understand why this is not possible? @junrushao @Hzfengsy
When we tried to reach child blocks of parent block “T_matmul_NT_o” it only returns the block “T_matmul_NT”, hence, we can’t reach “T_matmul_NT_init” by using that method either.
@main = primfn(var_placeholder: handle, var_placeholder_1: handle, var_T_relu: handle) -> ()
attr = {"layout_free_placeholders": [0], "target": Target(kind='rb_npu_lib', attrs={'dimension': ""}), "relay_attrs": meta[DictAttrs][0], "tir.noalias": True, "global_symbol": "tvmgen_default_rb_npu_lib_main_0"}
buffers = {placeholder: Buffer(placeholder_2: Pointer(global float32), float32, [256, 784], []),
placeholder_1: Buffer(placeholder_3: Pointer(global float32), float32, [1, 784], []),
T_relu: Buffer(T_relu_1: Pointer(global float32), float32, [1, 256], [])}
buffer_map = {var_placeholder: placeholder, var_placeholder_1: placeholder_1, var_T_relu: T_relu} {
block([], "root") {
tir.reads([])
tir.writes([])
T_matmul_NT = alloc_buffer(float32[1, 256])
for (i0: int32, 0, 1) {
for (i1_0: int32, 0, 64) {
block([1, 64, tir.reduce_axis(0, 1)], "T_matmul_NT_o") as [i, j_o, k_o] {
bind(i, 0)
bind(j_o, i1_0)
bind(k_o, 0)
tir.reads([placeholder_1[0, 0:784], placeholder[(j_o*4):((j_o*4) + 4), 0:784]])
tir.writes([T_matmul_NT[i, (j_o*4):((j_o*4) + 4)]])
with init() {
for (i1_1: int32, 0, 4) {
block([4], "T_matmul_NT_init") as [j_init] {
bind(j_init, i1_1)
tir.reads([])
tir.writes([T_matmul_NT[i, ((j_o*4) + j_init)]])
T_matmul_NT[i, ((j_o*4) + j_init)] = 0f32
}
}
for (i2_0: int32, 0, 392) {
for (i2_1: int32, 0, 2) {
for (i1_1_1: int32, 0, 4) "unroll" {
block([4, tir.reduce_axis(0, 784)], "T_matmul_NT") as [j, k] {
bind(j, i1_1_1)
bind(k, ((i2_0*2) + i2_1))
tir.reads([T_matmul_NT[i, ((j_o*4) + j)], placeholder_1[0, k], placeholder[((j_o*4) + j), k]])
tir.writes([T_matmul_NT[i, ((j_o*4) + j)]])
T_matmul_NT[i, ((j_o*4) + j)] = (T_matmul_NT[i, ((j_o*4) + j)] + (placeholder_1[0, k]*placeholder[((j_o*4) + j), k]))
}
}
}
for (i1_1_2: int32, 0, 4) "unroll" {
block([1, 256], "T_relu") as [ax0, ax1] {
bind(ax0, 0)
bind(ax1, ((i1_0*4) + i1_1_2))
tir.reads([T_matmul_NT[0, ax1]])
tir.writes([T_relu[ax0, ax1]])
T_relu[ax0, ax1] = max(T_matmul_NT[0, ax1], 0f32)
}
}
}
}
CC: @areusch @aca88 @SebastianBoblestETAS @sezgin1947 @UlrikHjort