Hi experts, I am trying to build cuda source code from a json record, but when I use tvm.build(sch, args, target="cuda")
, I keep getting errors like: “Variable `` is directly accessed by host memory.”
I have four arguments in total:
[Tensor(shape=[4, 7, 7, 160], op.name=placeholder),
Tensor(shape=[1, 1, 160, 960], op.name=placeholder),
Tensor(shape=[1, 1, 1, 960], op.name=placeholder),
Tensor(shape=[4, 7, 7, 960], op.name=T_multiply)]
I successfully bound T_multiply
using the code below, but I couldn’t bind the placeholder tensors. Attempting to do so results in an error: AttributeError: PlaceholderOp object has no attribute axis
.
Do you have any ideas on how we can properly bind the placeholder tensors, build the code, and generate cuda source code? Thanks so much!
inp = recover_measure_input(inp, True)
task = inp.task
sch, args = task.compute_dag.apply_steps_from_state(task.compute_dag.get_init_state())
print("sch", sch)
print("args", args)
with tvm.target.Target("cuda"):
lowered_func = tvm.lower(sch, args, name="my_kernel")
print("lowered_func", lowered_func)
# Define block and thread axes
block_x = te.thread_axis("blockIdx.x")
block_y = te.thread_axis("blockIdx.y")
thread_x = te.thread_axis("threadIdx.x")
thread_y = te.thread_axis("threadIdx.y")
for arg in args:
if len(arg.shape) >= 2:
bx, tx = sch[arg].split(arg.op.axis[0], factor=32)
by, ty = sch[arg].split(arg.op.axis[1], factor=32)
sch[arg].bind(bx, block_x)
sch[arg].bind(by, block_y)
sch[arg].bind(tx, thread_x)
sch[arg].bind(ty, thread_y)
elif len(arg.shape) == 1:
bx, tx = sch[arg].split(arg.op.axis[0], factor=32)
sch[arg].bind(bx, block_x)
sch[arg].bind(tx, thread_x)
cuda_module = tvm.build(sch, args, target="cuda")