import numpy as np
import pytest
import tvm
import onnx
from tvm.contrib import graph_executor
from tvm import meta_schedule as ms
from tvm import relay, auto_scheduler
from tvm.meta_schedule.testing import relay_workload
from tvm.meta_schedule.testing.tlcbench import load_quantized_bert_base
from tvm.tir.tensor_intrin.cuda import *
from tvm.tir.tensor_intrin.arm_cpu import DP4A_INTRIN
from tvm.tir.tensor_intrin.rocm import AMDGPU_SDOT4_INTRIN
from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN as VNNI_INTRIN
@tvm.testing.requires_gpu
@pytest.mark.skip("Slow on CI")
@pytest.mark.parametrize(
["model_name", "input_shape"],
[("bert_base", (8, 128)), ("resnet_18", (16, 3, 224, 224)), ("resnet_50", (16, 3, 224, 224))],
)
This file has been truncated. show original