I use Relax onnx frontend to load onnx model with only one Range op. the model is simple, and I will upload it later.
after loaded the onnx model, I got the follow IRModule, but failed to build with relax
@I.ir_module
class Module:
@T.prim_func(private=True)
def arange(A: T.Buffer((), "float32"), T_arange: T.Buffer((T.Cast("int64", T.ceil(A[()])),), "float32")):
T.func_attr({"tir.is_scheduled": T.bool(True), "tir.noalias": T.bool(True)})
# with T.block("root"):
for ax0_fused_1 in T.thread_binding(T.int64(256), thread="blockIdx.x"):
for ax0_fused_2 in T.thread_binding(T.int64(2048), thread="threadIdx.x"):
for ax0_fused_0 in range((T.Cast("int64", T.ceil(A[()])) + T.int64(524287)) // T.int64(524288)):
with T.block("T_arange"):
v_ax0 = T.axis.spatial(T.Cast("int64", T.ceil(A[()])), ax0_fused_0 * T.int64(524288) + ax0_fused_1 * T.int64(2048) + ax0_fused_2)
T.where((ax0_fused_0 * T.int64(256) + ax0_fused_1) * T.int64(2048) + ax0_fused_2 < T.Cast("int64", T.ceil(A[()])))
T.reads()
T.writes(T_arange[v_ax0])
T_arange[v_ax0] = T.Cast("float32", v_ax0)
@R.function
def main(input__encoder_make_pad_mask_Cast_output_0: R.Tensor((), dtype="float32")) -> R.Tensor((T.Cast("int64", T.ceil(A[()])),), dtype="float32"):
R.func_attr({"num_input": 1})
cls = Module
with R.dataflow():
lv = R.call_tir(cls.arange, (input__encoder_make_pad_mask_Cast_output_0,), out_sinfo=R.Tensor((T.Cast("int64", T.ceil(A[()])),), dtype="float32"))
gv: R.Tensor((T.Cast("int64", T.ceil(A[()])),), dtype="float32") = lv
R.output(gv)
return gv