@register_annotate_function("nn.global_avg_pool2d")
def global_avg_pool2d_rewrite(ref_call, new_args, ctx):
"""Rewrite function for global_avg_pool2d for stopping quantize"""
if quantize_context().check_to_skip(ref_call):
return None
expr, x_kind = _get_expr_kind(new_args[0])
if x_kind is None:
return None
expr = _forward_op(ref_call, [new_args[0].realize()])
# stop quantize after global_avg_pool2d
quantize_context().stop_quantize()
return expr
1 Like
My guess is that tvm stops quantizing after the global average pooling for accuracy purposes.
Usually in modern CNN after the global average pooling, you have the classifier (dense layer). In order to preserve accuracy the computation will be performed on 32 bit (instead of 8bit)
@OValery16 's answer is correct. Quantized global average pooling usually cause large accuracy drop.