I’m trying to implement adaptive pooling with TVM.
def adaptive_pooling(tensor, bc, oh, ow):
ih=tensor.shape[len(tensor.shape)-2]
iw=tensor.shape[len(tensor.shape)-1]
def compute(*out_shape):
bc, ox, oy=out_shape
start_idx=ox*ih/oh
end_idx=((ox+1)*ih+oh-1)/oh
start_idy=oy*iw/ow
end_idy=((oy+1)*iw+ow-1)/ow
reduce_x=tvm.reduce_axis((start_idx, end_idx),name='rx')
reduce_y=tvm.reduce_axis((start_idy, end_idy),name='ry')
return tvm.sum(tensor[bc, reduce_x, reduce_y], axis=[reduce_x, reduce_y])
return tvm.compute((bc, oh, ow),compute, name='ap')
When I try to bind the output axis to threads and blocks
s=tvm.create_schedule(B.op)
bc, oh, ow=s[B].op.axis
ohb, oht=s[B].split(oh, factor=16)
owb, owt=s[B].split(ow, factor=16)
s[B].bind(owt,tvm.thread_axis('threadIdx.x'))
s[B].bind(owb,tvm.thread_axis('blockIdx.x'))
s[B].bind(oht,tvm.thread_axis('threadIdx.y'))
s[B].bind(ohb,tvm.thread_axis('blockIdx.y'))
s[B].bind(bc,tvm.thread_axis('blockIdx.z'))
func=tvm.build(s,[BC, H, W, OH, OW, A, B], 'cuda', name='tvm_kernel3')
TVM fails to build the function, with the following error:
tvm/src/pass/make_api.cc:169: Not all Vars are passed in api_args: 'i2' 'i1' does not appeared in api_args
The ‘i2’ and ‘i1’ are tensor shape of output tensor, which is binded to threads, but they are used by input tensor to calculate index, and the itervar remains ‘i2’ and ‘i1’ even though they are binded to threads. Afterwards, TVM fails to build function because it cannot find them.
Full Code:
import numpy as np
import tvm
def adaptive_pooling(tensor, bc, oh, ow):
ih=tensor.shape[len(tensor.shape)-2]
iw=tensor.shape[len(tensor.shape)-1]
def compute(*out_shape):
bc, ox, oy=out_shape
start_idx=ox*ih/oh
end_idx=((ox+1)*ih+oh-1)/oh
start_idy=oy*iw/ow
end_idy=((oy+1)*iw+ow-1)/ow
reduce_x=tvm.reduce_axis((start_idx, end_idx),name='rx')
reduce_y=tvm.reduce_axis((start_idy, end_idy),name='ry')
return tvm.sum(tensor[bc, reduce_x, reduce_y], axis=[reduce_x, reduce_y])
return tvm.compute((bc, oh, ow),compute, name='ap')
BC=tvm.var(name='BC',dtype="int32")
H=tvm.var(name='H',dtype='int32')
W=tvm.var(name='W',dtype='int32')
OH=tvm.var(name='OH',dtype='int32')
OW=tvm.var(name='OW',dtype='int32')
A=tvm.placeholder((BC,H,W),name='A')
B=adaptive_pooling(A, BC, OH, OW)
s=tvm.create_schedule(B.op)
bc, oh, ow=s[B].op.axis
ohb, oht=s[B].split(oh, factor=16)
owb, owt=s[B].split(ow, factor=16)
s[B].bind(owt,tvm.thread_axis('threadIdx.x'))
s[B].bind(owb,tvm.thread_axis('blockIdx.x'))
s[B].bind(oht,tvm.thread_axis('threadIdx.y'))
s[B].bind(ohb,tvm.thread_axis('blockIdx.y'))
s[B].bind(bc,tvm.thread_axis('blockIdx.z'))
print(tvm.lower(s, [A, B],simple_mode=True))
func=tvm.build(s,[BC, H, W, OH, OW, A, B], 'cuda', name='tvm_kernel3')
print(func.imported_modules[0].get_source())