Auto-sheduler cuda file

In the official tutorial:auto_scheduler_matmul_x86.py, How do I see the cuda code executed by the program? How do I generate the cuda file at runtime?

tutorial:

import numpy as np

import tvm

from tvm import te

from tvm import relay, auto_scheduler

import tvm.relay as relay

@auto_scheduler.register_workload
def matmul_add(N, L, M, dtype): A = te.placeholder((N, L), name=“A”, dtype=dtype) B = te.placeholder((L, M), name=“B”, dtype=dtype) C = te.placeholder((N, M), name=“C”, dtype=dtype)

k = te.reduce_axis((0, L), name="k")
matmul = te.compute(
    (N, M),
    lambda i, j: te.sum(A[i, k] * B[k, j], axis=k),
    name="matmul",
    attrs={"layout_free_placeholders": [B]},  # enable automatic layout transform for tensor B
)
out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="out")

return [A, B, C, out]

target = tvm.target.Target(“cuda”)

N = L = M = 2048

task = tvm.auto_scheduler.SearchTask(func=matmul_add, args=(N, L, M, “float32”), target=target)

print(“Computational DAG:”)

print(task.compute_dag)

log_file = “matmul.json” tune_option = auto_scheduler.TuningOptions( num_measure_trials=2, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], verbose=2, )

task.tune(tune_option) sch, args = task.apply_best(log_file)