Bert-large masked lm pre-quantization model build failed

chenugray · December 30, 2021, 9:04am

from pytorch_pretrained_bert import BertForMaskedLM
import torch

def main(args):
    bert_model_origin = BertForMaskedLM.from_pretrained("bert-large-uncased")
    example_tensor = torch.randint(0, 100, (1, 256))
    model_int8 = torch.quantization.quantize_dynamic(bert_model_origin, quant_layers={torch.nn.Linear}, dtype=torch.qint8)
    model_int8.eval()
    trace_model = torch.jit.trace(model_int8, [example_tensor])
    trace_model.eval()
    shape_list = [(i.debugName().split('.')[0], i.type().sizes()) for i in list(trace_model.graph.inputs())[1:]]
    mod_bert, params_bert = tvm.relay.frontend.pytorch.from_pytorch(trace_model, shape_list)
    target = tvm.target.Target(target="llvm", host="llvm")
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod_bert, target=target, params=params_bert)
        lib.export_library(os.path.realpath("net_int18_cpu.tar"))

see code above, when build pre-quantization bert-large masked lm model, it will a failure like this:

then i find it’s find aten::mean’s quantized weight when dfs traverse to the root but find nothing.

then i comment one line in qnn_torch.py,

num_quantized_inputs = {
    "quantized::conv2d": 1,
    "quantized::conv2d_relu": 1,
    ...
    "aten::dequantize": 1,
#        "aten::mean": 1,
    "aten::upsample_nearest2d": 1,
    "aten::upsample_bilinear2d": 1,
    "aten::relu_": 1,
    ...
    "aten::hardsigmoid": 1,
    "quantized::conv_transpose2d": 1,
}

it got successed. so what’s wrong with this?