The pattern is from make_qnn_conv_hswish_pattern
below. It is an easier case of quantized hswish following qconv.
def make_qnn_mul_pattern(lhs=None, rhs=None):
if lhs is None:
lhs = relay.var('lhs')
rhs = relay.var('rhs')
lhs_scale = relay.var('lhs_scale')
lhs_zero_point = relay.var('lhs_zero_point')
rhs_scale = relay.var('rhs_scale')
rhs_zero_point = relay.var('rhs_zero_point')
output_scale = relay.var('output_scale')
output_zero_point = relay.var('output_zero_point')
return relay.qnn.op.mul(lhs, rhs, lhs_scale, lhs_zero_point,
rhs_scale, rhs_zero_point, output_scale,
output_zero_point)
def make_qnn_conv_pattern(with_pad=False):
x = relay.var('x')
y = relay.var('y')
x_zp = relay.var('x_zp')
y_zp = relay.var('y_zp')
x_scale = relay.var('x_scale')
y_scale = relay.var('y_scale')
kernel_size = (0, 0)
channels = 0
if with_pad:
inp = relay.op.nn.pad(x, pad_width=((0, 0), (0, 0), (1, 1), (1, 1)))
else:
inp = x
y_scale = relay.var('y_scale')
kernel_size = (0, 0)
channels = 0
if with_pad:
inp = relay.op.nn.pad(x, pad_width=((0, 0), (0, 0), (1, 1), (1, 1)))
else:
inp = x
qnn_conv = relay.qnn.op.conv2d(inp, y, x_zp, y_zp,
x_scale, y_scale, kernel_size, channels)
bias_var = relay.var("bias")
conv_out = relay.op.nn.bias_add(qnn_conv, bias_var)
requantized = relay.qnn.op.requantize(conv_out,
relay.var("requant_scale"),
relay.var("requant_zp"),
relay.var("output_scale"),
relay.var("output_zp"),
out_dtype="int32", axis=1)
clip = relay.op.clip(requantized, 0, 255)
cast = relay.op.cast(clip, dtype="uint8")
return cast
def make_qnn_conv_hswish_pattern(with_pad=False, with_add=True):
qconv_pat = make_qnn_conv_pattern(with_pad=with_pad)
clip = make_clip_pattern(qconv_pat)
if with_add:
assert False
pass # TODO
else:
return make_qnn_mul_pattern(qconv_pat, clip)