Autotvm_matmul_x86.py

The autotvm_matmul_x86.py file is executed in the CPU core by default. How can I change this to run on the GPU?

The code is:

@autotvm.template("tutorial/matmul")
def matmul(N, L, M, dtype):
A = te.placeholder((N, L), name="A", dtype=dtype)
B = te.placeholder((L, M), name="B", dtype=dtype)

k = te.reduce_axis((0, L), name="k")
C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
s = te.create_schedule(C.op)

# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]

##### define space begin #####
cfg = autotvm.get_config()
cfg.define_split("tile_y", y, num_outputs=2)
cfg.define_split("tile_x", x, num_outputs=2)
##### define space end #####

# schedule according to config
yo, yi = cfg["tile_y"].apply(s, C, y)
xo, xi = cfg["tile_x"].apply(s, C, x)

s[C].reorder(yo, xo, k, yi, xi)

return s, [A, B, C]
N, L, M = 512, 512, 512
task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="cuda")
print(task.config_space)

# logging config (for printing tuning log to the screen)
logging.getLogger("autotvm").setLevel(logging.DEBUG)
logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))

measure_option = autotvm.measure_option(builder="local", 
runner=autotvm.LocalRunner(number=5))
tuner = autotvm.tuner.RandomTuner(task)
tuner.tune(
   n_trial=10,
    measure_option=measure_option,
   callbacks=[autotvm.callback.log_to_file("matmul.log")],
)
with autotvm.apply_history_best("matmul.log"):
     with tvm.target.Target("cuda"):
          s, arg_bufs = matmul(N, L, M, "float32")
          func = tvm.build(s, arg_bufs)

# check correctness
a_np = np.random.uniform(size=(N, L)).astype(np.float32)
b_np = np.random.uniform(size=(L, M)).astype(np.float32)
c_np = a_np.dot(b_np)

c_tvm = tvm.nd.empty(c_np.shape)
func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)
tvm.testing.assert_allclose(c_np, c_tvm.numpy(), rtol=1e-4)