I finished auto-tuning my NVIDIA GPU and produced .log file for resnet-50. I would like to benchmark the performance improvements by following the performance benchmark on GitHub (https://github.com/apache/tvm/tree/main/apps/benchmark), but it seems like it is using pretuned parameters that TVM is providing. How can I use my own resnet-50.log file to test its performance?
First, redefine the func get_network(network, batch_size=1, dtype=dtype)
to load yourself model.
Second, add with autotvm.apply_history_best(log_file):
like this
def evaluate_network(network, target, target_host, dtype, repeat):
# connect to remote device
tracker = tvm.rpc.connect_tracker(args.host, args.port)
remote = tracker.request(args.rpc_key)
print_progress(network)
net, params, input_shape, output_shape = get_network(network, batch_size=1, dtype=dtype)
with autotvm.apply_history_best(log_file):# put your produced .log file path here
print_progress("%-20s building..." % network)
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(net, target=target, target_host=target_host, params=params)
tmp = tempdir()
if "android" in str(target) or "android" in str(target_host):
from tvm.contrib import ndk
filename = "%s.so" % network
lib.export_library(tmp.relpath(filename), ndk.create_shared)
else:
filename = "%s.tar" % network
lib.export_library(tmp.relpath(filename))
# upload library and params
print_progress("%-20s uploading..." % network)
ctx = remote.context(str(target), 0)
remote.upload(tmp.relpath(filename))
rlib = remote.load_module(filename)
module = runtime.GraphModule(rlib["default"](ctx))
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input("data", data_tvm)
# evaluate
print_progress("%-20s evaluating..." % network)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=repeat)
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond
print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" %np.std(prof_res))
)
def benchmark(network, target):
net, params, input_shape, output_shape = get_network(network, batch_size=1)
with autotvm.apply_history_best(log_file):# put your produced .log file path here
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(net, target=target, params=params)
# create runtime
ctx = tvm.context(str(target), 0)
module = runtime.GraphModule(lib["default"](ctx))
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input("data", data_tvm)
# evaluate
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=args.repeat)
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond
print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" %np.std(prof_res)))
Thanks for your response. But my GPU is an NVIDIA GPU so shouldn’t I be using the ’ [gpu_imagenet_bench.py]’ instead?
yes, you are right