Sysinfo:
- TVM: 2022.08.05 c4aab62c6d588ed06f60b0db9691271f5883dc66
- Operating System / Platform => Linux Ubuntu 22.04
- Compiler => g++ (GCC) 8.5.0 20210514 (Red Hat 8.5.0-10)
- CPU: Intel(R) Xeon(R) Gold 6342 CPU @ 2.80GHz
- CPU support flags: avx2 avx512, avx512_vnni, not support avx512_bf16
- Python version: 3.9.12
- Problem classification: Model Optimization
- Framework: trained in pytorch, current deployment is onnxruntime
- Inference Hardware: only CPU
- Model name: hifiGAN vocoder vocoder.onnx · npc-engine/exported-flowtron-waveglow-librispeech-tts at main
- Accuracy => default fp32
- Problem classification=> CPU
Description: I want to use tvm to optimize the hifiGAN vocoder and accelerate inference. I compile the model to generate several models to handle different size input ( padding the input and truncate the output ). I think we should expect an accleration in inference speed, but actually not.
onnxruntime
avg cost: 91.47004077309056 ms, mel 64, runtime_threads 6
avg cost: 157.6029250496312 ms, mel 128, runtime_threads 6
tvm
avg cost: 113.67635977895637 ms, mellen 64, -mcpu=core-avx2, runtime_threads 6
avg cost: 193.6937507830168 ms, mellen 128, -mcpu=core-avx2, runtime_threads 6
avg cost: 135.77000718367728 ms, mellen 64, -mcpu=skylake-avx512, runtime_threads 6
avg cost: 202.8991925088983 ms, mellen 128, -mcpu=skylake-avx512, runtime_threads 6
So I wonder if I choose the right target -mcpu
flag? I used the llvm -mcpu=core-avx2
and llvm -mcpu=skylake-avx512
.
Another question is, during execution in the line 11 of optimize.py
, tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
,
warnings like conv1d_transpose is not optimized for this platform.
will occur. Corresponding tvm code: https://github.com/apache/tvm/blob/main/python/tvm/relay/op/strategy/generic.py#L736-L739
optimize.py
import tvm
import tvm.testing
import onnx
import tvm.relay as relay
from tvm import relay, auto_scheduler
import tvm.relay.testing
import tvm.auto_scheduler as auto_scheduler
def define_task(mod, params, target):
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
return tasks, task_weights
def run_tuning(tasks, task_weights, log_file):
print("Begin tuning...")
tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=500,
runner=auto_scheduler.LocalRunner(repeat=10, enable_cpu_cache_flush=True),
measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
)
tuner.tune(tune_option)
def out_build(log_file, mod, target, params, out_so):
print("Compile...")
"""
Compile...
/home/user/repos/tvm/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
warnings.warn(
conv1d_transpose is not optimized for this platform.
conv1d_transpose is not optimized for this platform.
conv1d_transpose is not optimized for this platform.
conv1d_transpose is not optimized for this platform.
"""
print(f"target ISA {target}")
with auto_scheduler.ApplyHistoryBest(log_file):
with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
lib = relay.build(mod, target=target, params=params)
lib.export_library(out_so)
def tune_onnx_mode(model_path, log_file, shape_dict, target, out_so):
onnx_model = onnx.load(model_path)
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
tasks, task_weights = define_task(mod, params, target)
run_tuning(tasks, task_weights, log_file)
out_build(log_file, mod, target, params, out_so)
if __name__ == '__main__':
tune_tasks = []
# for ISA, dir in zip(["llvm -mcpu=core-avx2", "llvm -mcpu=skylake-avx512"], ["avx2", "avx512"]):
ISA = "llvm -mcpu=icelake-server"
dir = "icelake"
output_dir = f"/home/user/repos/tvm-tune/hifiganonnx/{dir}"
# hifiGAN
for size in [64, 128]:
tasks = {
"model_path": "/home/user/repos/tvm-tune/hifiganonnx/std_en-US_Joanna_vocoder.onnx",
"log_file": f"{output_dir}/std_en-US_Joanna_vocoder_{size}.log",
"shape_dict": { "mel": (1, size, 80) },
"target": ISA,
"out_so": f"{output_dir}/std_en-US_Joanna_vocoder_{size}.so",
}
tune_tasks.append(tasks)
# start:
for task in tune_tasks:
try:
tune_onnx_mode(model_path=task['model_path'],
log_file=task['log_file'],
shape_dict=task['shape_dict'],
target=task['target'],
out_so=task['out_so'])
except Exception as e:
print(e)