Getting "TVMError: cannot make uint from negative value -4" when scheduling TVM programs

Hi,

I was playing with Tensor IR and I wrote a toy program and try to schedule it. My TIR program contains 3 blocks C, D, and E. My schedule includes a split , a reorder and a vectorize, all on block E. Then I compute block D at block E’s vectorized dimension. The full code is shown below:

import tvm
from tvm.ir.module import IRModule
from tvm.script import tir as T
import numpy as np
from tvm import te

tgt = tvm.target.Target(target="llvm", host="llvm")
dev = tvm.device(tgt.kind.name, 0)
n = te.var("n")
p = te.const(32, "int32")
A = te.placeholder((n,), dtype="float32", name="A")
B = te.placeholder((n,), dtype="float32", name="B")
C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
D = te.compute(A.shape, lambda i: C[i] + C[i], name="D")
E = te.compute(A.shape, lambda i: D[i] + A[i], name="E")
func = te.create_prim_func([A, B, E])
ir_mod = IRModule({"main": func})
schedule = tvm.tir.Schedule(ir_mod)
block_e = schedule.get_block("E")
i, = schedule.get_loops(block_e)
schedule.split(i, factors=[None, 4])
i_0, i_1 = schedule.get_loops(block_e)
schedule.reorder(i_1, i_0)
schedule.vectorize(i_1)
block_d = schedule.get_block("D")
schedule.compute_at(block_d, i_1)
print(schedule.mod.script())
mod = tvm.build(schedule.mod, tgt)

During tvm.build I got an error of “TVMError: cannot make uint from negative value -4”. I checked the TIR of the scheduled module, looks like this error is from a predicate deciding the range of the loop. Does TVM disallow schedule like this or is this a bug?

The TIR after scheduling:

@I.ir_module
class Module:
    @T.prim_func
    def main(var_A: T.handle, var_B: T.handle, var_E: T.handle):
        T.func_attr({"tir.noalias": T.bool(True)})
        n = T.int32()
        A = T.match_buffer(var_A, (n,))
        B = T.match_buffer(var_B, (n,))
        E = T.match_buffer(var_E, (n,))
        # with T.block("root"):
        C = T.alloc_buffer((n,))
        D = T.alloc_buffer((n,))
        for i in range(n):
            with T.block("C"):
                v_i = T.axis.spatial(n, i)
                T.reads(A[v_i], B[v_i])
                T.writes(C[v_i])
                C[v_i] = A[v_i] + B[v_i]
        for i_1 in T.vectorized(4):
            for ax0 in range(T.max(i_1, (n + 3) // 4 * 4 + i_1 - 4) + 1 - T.min(i_1, (n + 3) // 4 * 4 + i_1 - 4)):
                with T.block("D"):
                    v_i = T.axis.spatial(n, T.min(i_1, (n + 3) // 4 * 4 + i_1 - 4) + ax0)
                    T.where(0 <= T.min(i_1, (n + 3) // 4 * 4 + i_1 - 4) + ax0 and T.min(i_1, (n + 3) // 4 * 4 + i_1 - 4) + ax0 < n)
                    T.reads(C[v_i])
                    T.writes(D[v_i])
                    D[v_i] = C[v_i] + C[v_i]
            for i_0 in range((n + 3) // 4):
                with T.block("E"):
                    v_i = T.axis.spatial(n, i_0 * 4 + i_1)
                    T.where(i_0 * 4 + i_1 < n)
                    T.reads(D[v_i], A[v_i])
                    T.writes(E[v_i])
                    E[v_i] = D[v_i] + A[v_i]