Error while trying to tensorize a simple TE program

Hi , i am trying to leverage Tensorize to replace a block of computation which happens to be a simple add computation (for experimental purposes) ,
But unfortunately , unable to do so. When i run tensorize on the schedule i get following error :

TVMError: In function te.StageTensorize(0: Stage, 1: tir.IterVar, 2: TensorIntrin) -> Stage: error while converting argument 1: [18:56:27] /home/user/tvm/include/tvm/runtime/packed_func.h:1866: InternalError: Check failed: (!checked_type.defined()) is false: Expected tir.IterVar, but got Array

here is complete code i am trying to run :

import tvm 
from tvm import te
import tvm.script
import numpy as np

N = 1024
inp1 = te.placeholder((N,) , name = "inp1")
inp2 = te.placeholder((N,) , name = "inp2")
out = te.compute((N,) , lambda i : inp1[i] + inp2[i] ,  name = "out")
s = te.create_schedule(out.op)
i = out.op.axis
print(i)
# print(tvm.lower(s , [inp1 , inp2 , out] , simple_mode = True))


def intrin_add(n):
    inp1_ = te.placeholder((n, ) , name = "inp1_")
    inp2_ = te.placeholder((n, ) , name = "inp2_")
    out_ = te.compute((n,) , lambda i : inp1_[i] + inp2_[i] , name = "out_")
    inp1_b = tvm.tir.decl_buffer(inp1_.shape , inp1_.dtype , name = "A" , offset_factor = 1 , strides = [1])
    inp2_b = tvm.tir.decl_buffer(inp2_.shape , inp2_.dtype , name = "B" , offset_factor = 1 , strides = [1])
    out_b = tvm.tir.decl_buffer(out_.shape , out_.dtype , name = "C" , offset_factor = 1 , strides = [1])

    def intrin_func(ins , outs):
        ib = tvm.tir.ir_builder.create()
        aa , bb = ins
        cc = outs[0]
        ib.emit(
            tvm.tir.call_extern(
                "int32" , 
                "CustomADD"  ,
                cc.access_ptr("w") ,
                aa.access_ptr("r") , 
                bb.access_ptr("r") ,
                n ,
            )
        )
        return ib.get()
    return te.decl_tensor_intrin(out_.op , intrin_func , binds = {inp1_ : inp1_b , inp2_ : inp2_b , out_ : out_b})



custom_add = intrin_add(N)
print(custom_add)
s[out].tensorize(i , custom_add)
print("====================================================================")
print(tvm.lower(s , [inp1 , inp2 , out] , simple_mode = True))

a kind cc to @Hzfengsy @sanirudh for your valuable insights.
thanks

it seems , we the loop variable was indeed an array of LoopRV. so using

s[out].tensorize(i[0] , custom_add)

worked for me . thanks