In the official tutorial:auto_scheduler_matmul_x86.py, How do I see the cuda code executed by the program? How do I generate the cuda file at runtime?
tutorial:
import numpy as np
import tvm
from tvm import te
from tvm import relay, auto_scheduler
import tvm.relay as relay
@auto_scheduler.register_workload
def matmul_add(N, L, M, dtype):
A = te.placeholder((N, L), name=“A”, dtype=dtype)
B = te.placeholder((L, M), name=“B”, dtype=dtype)
C = te.placeholder((N, M), name=“C”, dtype=dtype)
k = te.reduce_axis((0, L), name="k")
matmul = te.compute(
(N, M),
lambda i, j: te.sum(A[i, k] * B[k, j], axis=k),
name="matmul",
attrs={"layout_free_placeholders": [B]}, # enable automatic layout transform for tensor B
)
out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="out")
return [A, B, C, out]
target = tvm.target.Target(“cuda”)
N = L = M = 2048
task = tvm.auto_scheduler.SearchTask(func=matmul_add, args=(N, L, M, “float32”), target=target)
print(“Computational DAG:”)
print(task.compute_dag)
log_file = “matmul.json” tune_option = auto_scheduler.TuningOptions( num_measure_trials=2, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], verbose=2, )
task.tune(tune_option) sch, args = task.apply_best(log_file)