Hi, I noticed that some op functions were fused into new function like
fused_nn_conv2d_add_cast_fixed_point_multiply_clip_cast_cast_subtract
from model TFLite mobilenet_v1_0.25_224_quant. So, i tried get IR dump from “tvm/python/tvm/driver/build_module.py” in
def lower(sch, args, name="main", binds=None, simple_mode=False):
...
if instrument_bound_checkers:
pass_list += [tvm.tir.transform.InstrumentBoundCheckers()]
pass_list += [tvm.transform.PrintIR()]
optimize = tvm.transform.Sequential(pass_list)
mod = optimize(mod)
return mod
And get IR like this
#[version = "0.0.5"]
primfn(placeholder_4: handle, placeholder_5: handle, placeholder_6: handle, placeholder_7: handle, T_subtract_1: handle) -> ()
attr = {"global_symbol": "fused_nn_conv2d_add_cast_fixed_point_multiply_clip_cast_cast_subtract", "tir.noalias": True}
buffers = {placeholder_3: Buffer(placeholder_8: Pointer(int16), int16, [], []),
placeholder_2: Buffer(placeholder_9: Pointer(int16), int16, [3, 3, 256, 1], []),
T_subtract: Buffer(T_subtract_2: Pointer(int16), int16, [1, 7i64, 7i64, 256], []),
placeholder_1: Buffer(placeholder_10: Pointer(int32), int32, [1, 1, 1, 256], []),
placeholder: Buffer(placeholder_11: Pointer(int16), int16, [1, 7i64, 7i64, 256], [])}
buffer_map = {placeholder_4: placeholder, placeholder_6: placeholder_1, T_subtract_1: T_subtract, placeholder_5: placeholder_2, placeholder_7: placeholder_3} {
attr [PaddedInput: Pointer(int16)] "storage_scope" = "global";
allocate(PaddedInput, int16, [20736i64]);
attr [DepthwiseConv2d: Pointer(int32)] "storage_scope" = "global";
allocate(DepthwiseConv2d, int32, [12544i64]) {
for (i1: int32, 0, 9) {
for (i2: int32, 0, 9) {
for (i3: int32, 0, 256) {
PaddedInput[(((i1*2304) + (i2*256)) + i3)] = @tir.if_then_else(((((1i64 <= cast(int64, i1)) && (cast(int64, i1) < 8i64)) && (1i64 <= cast(int64, i2))) && (cast(int64, i2) < 8i64)), (int16*)placeholder_11[((((i1*1792) + (i2*256)) + i3) - 2048)], 0i16, dtype=int16)
}
}
}
...
ommited
...
for (ax1_2: int32, 0, 7) {
for (ax2_2: int32, 0, 7) {
for (ax3_2: int32, 0, 256) {
DepthwiseConv2d[(((ax1_2*1792) + (ax2_2*256)) + ax3_2)] = cast(int16, (uint8*)PaddedInput[(((ax1_2*1792) + (ax2_2*256)) + ax3_2)])
}
}
}
for (ax1_3: int32, 0, 7) {
for (ax2_3: int32, 0, 7) {
for (ax3_3: int32, 0, 256) {
T_subtract_2[(((ax1_3*1792) + (ax2_3*256)) + ax3_3)] = ((int16*)DepthwiseConv2d[(((ax1_3*1792) + (ax2_3*256)) + ax3_3)] - (int16*)placeholder_8[0])
}
}
}
It seems this function take int16 input and output as int16, which is not matched with i knew from https://discuss.tvm.apache.org/t/tf-lite-quantized-conv2d-operator-conversion/2651/20. A layer should be start from (int8/uin8) input and requantize to int8 for next layer.