Trying to implement a correct (i.e. unlimited filter size) bilinear filter:
rows = tvm.var("rows")
cols = tvm.var("cols")
chans = tvm.var("chans")
input_vec = tvm.placeholder((rows,cols,chans), dtype="float32")
kernel = tvm.compute((cols,chans)
, lambda c, cc: 1.0 * c * cc
, name="kern_vec")
result = tvm.compute((rows,cols,chans)
, lambda y, x, c: input_vec[y,x,c] * kernel[x,c]
, name="answer")
sched = tvm.create_schedule(result.op)
result_stage = sched[result]
kernel_stage = sched[kernel]
kernel_stage.compute_at(result_stage, result.op.axis[1])
result_stage.bind(result.op.axis[0], tvm.thread_axis("blockIdx.x"))
result_stage.bind(result.op.axis[1], tvm.thread_axis("threadIdx.x"))
fun = tvm.build(sched, [input_vec], "opencl", name="test_compute_at")
I get:
Traceback (most recent call last):
File "compute_at_gpu.py", line 21, in <module>
fun = tvm.build(sched, [input_vec], "opencl", name="test_compute_at")
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/build_module.py", line 519, in build
mdev = codegen.build_module(fdevice, str(target_device)) if fdevice else None
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/codegen.py", line 20, in build_module
return _Build(lowered_func, target)
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/_ffi/_ctypes/function.py", line 185, in __call__
ctypes.byref(ret_val), ctypes.byref(ret_tcode)))
File "/home/chrisn/.local/lib/python3.6/site-packages/tvm-0.5.dev0-py3.6-linux-x86_64.egg/tvm/_ffi/base.py", line 66, in check_call
raise TVMError(py_str(_LIB.TVMGetLastError()))
tvm._ffi.base.TVMError: [12:35:35] /home/chrisn/dev/tvm-clj/tvm/src/codegen/codegen_c.cc:725: Check failed: constant_size > 0 (0 vs. 0) Can only handle constant size stack allocation for now
How can I refactor the code to allow the kernel to be calculated specifically at that point?