Hi everyone,
I am trying to write some code to auto tuning a pretrained quantized mobilenetv2 network, so it can be run faster in the raspberry pi 4. I can tune the model, build and run the compiled model in the hardware, but the inference time is slower that using the vanilla model with pytorch. I will appreciate if someone can check my code and tell me if there is something wrong or missing. Than you
import os
import time
import tvm
from tvm import relay, autotvm
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
import tvm.contrib.graph_executor as runtime
from PIL import Image
import numpy as np
import torch
from torchvision.models.quantization import mobilenet as qmobilenet
from tvm.contrib.download import download_testdata
import torchvision.transforms as transforms
target = tvm.target.arm_cpu("rasp4b64")
device_key = "rk3399"
use_android = False
#### TUNING OPTION ####
network = "mobilenet_v2"
log_file = "%s.%s.log" % (device_key, network)
tuningDataFile = "%s.%s.json" % (device_key, network)
hostIP ="10.46.30.51"
dtype = "float32"
tuning_option = {
"log_filename": log_file,
"tuner": "xgb",
"n_trial": 1500,
"early_stopping": 800,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(build_func="ndk" if use_android else "default"),
runner=autotvm.RPCRunner(
device_key,
host=hostIP,
port=9190,
number=5, # specifies the number of different configurations that we will test
repeat = 1,
min_repeat_ms = 0, # since we're tuning on a CPU, can be set to 0
timeout=10,
),
),
}
def tune_tasks(
tasks,
measure_option,
tuner="xgb",
n_trial=1000,
early_stopping=None,
log_filename="tuning.log",
use_transfer_learning=True,
):
# create tmp log file
tmp_log_file = log_filename + ".tmp"
print(tmp_log_file)
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)
for i, tsk in enumerate(reversed(tasks)):
now = time.time()
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
# create tuner
if tuner == "xgb" or tuner == "xgb-rank":
tuner_obj = XGBTuner(tsk, loss_type="rank")
elif tuner == "xgb_knob":
tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
elif tuner == "xgb_itervar":
tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
elif tuner == "xgb_curve":
tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
elif tuner == "ga":
tuner_obj = GATuner(tsk, pop_size=50)
elif tuner == "random":
tuner_obj = RandomTuner(tsk)
elif tuner == "gridsearch":
tuner_obj = GridSearchTuner(tsk)
else:
raise ValueError("Invalid tuner: " + tuner)
if use_transfer_learning:
if os.path.isfile(tmp_log_file):
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
# process tuning
tsk_trial = min(n_trial, len(tsk.config_space))
tuner_obj.tune(
n_trial=tsk_trial,
early_stopping=early_stopping,
measure_option=measure_option,
callbacks=[
autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
autotvm.callback.log_to_file(tmp_log_file),
],
)
diff = int(time.time() - now)/60
print(f'Completed task {i} in {diff:.2f} min')
# pick best records to a cache file
autotvm.record.pick_best(tmp_log_file, log_filename)
os.remove(tmp_log_file)
def tune_and_evaluate(tuning_opt, mod, params, input_shape, input_name):
# extract workloads from relay program
print("Extract tasks...")
# mod, params, input_shape, _ = get_network(network, batch_size=1)
tasks = autotvm.task.extract_from_program(
mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
)
print("Tuning...")
tune_tasks(tasks, **tuning_opt)
# compile kernels with history best records
with autotvm.apply_history_best(log_file):
print("Compile...")
with tvm.transform.PassContext(opt_level=3):
lib = relay.build_module.build(mod, target=target, params=params)
# export library
# tmp = tempdir()
pathSaveLib = "models_generated/"
if use_android:
from tvm.contrib import ndk
filename = "net_dep_and_opt.so"
# lib.export_library(tmp.relpath(filename), ndk.create_shared)
lib.export_library(pathSaveLib + filename, ndk.create_shared)
else:
filename = "net_dep_and_opt.tar"
# lib.export_library(tmp.relpath(filename))
lib.export_library(pathSaveLib + filename)
# upload module to device
print("Upload...")
remote = autotvm.measure.request_remote(device_key, hostIP, 9190, timeout=10000)
remote.upload(pathSaveLib + filename)
rlib = remote.load_module(filename)
# upload parameters to device
dev = remote.device(str(target), 0)
module = runtime.GraphModule(rlib["default"](dev))
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
module.set_input(input_name, data_tvm)
# evaluate
print("Evaluate inference time cost...")
print(module.benchmark(dev, number=1, repeat=10))
def quantize_model(model, inp):
model.fuse_model()
model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
torch.quantization.prepare(model, inplace=True)
# Dummy calibration
model(inp)
torch.quantization.convert(model, inplace=True)
def get_transform():
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
return transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
]
)
def get_real_image(im_height, im_width):
img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
img_path = download_testdata(img_url, "cat.png", module="data")
return Image.open(img_path).resize((im_height, im_width))
def get_imagenet_input():
im = get_real_image(224, 224)
preprocess = get_transform()
pt_tensor = preprocess(im)
return np.expand_dims(pt_tensor.numpy(), 0)
qmodel = qmobilenet.mobilenet_v2(pretrained=True).eval()
inp = get_imagenet_input()
pt_inp = torch.from_numpy(inp)
quantize_model(qmodel, pt_inp)
script_module = torch.jit.trace(qmodel, pt_inp).eval()
with torch.no_grad():
pt_result = script_module(pt_inp).numpy()
input_name = "input" # the input name can be be arbitrary for PyTorch frontend.
input_shape = (1, 3, 224, 224)
input_shapes = [(input_name, input_shape)]
mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
tune_and_evaluate(tuning_option, mod, params, input_shape, input_name)