Hi:
When I am using relay.quantize.partition()
pass to convert a simple relay graph, there are no cast
and stop_fusion
ops added to pack the first conv2d
operator.
The original relay graph
fn (%input0: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 128, 1, 1), float32] {
%0 = nn.conv2d(%input0, meta[relay.Constant][0] /* ty=Tensor[(64, 3, 7, 7), float32] */, strides=[2, 2], padding=[3, 3, 3, 3], channels=64, kernel_size=[7, 7]) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%1 = nn.relu(%0) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%2 = nn.max_pool2d(%1, pool_size=[3, 3], strides=[2, 2], padding=[1, 1, 1, 1]) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%3 = nn.conv2d(%2, meta[relay.Constant][1] /* ty=Tensor[(128, 64, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=128, kernel_size=[3, 3]) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%4 = nn.relu(%3) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%5 = nn.adaptive_avg_pool2d(%4, output_size=[1, 1]) /* ty=Tensor[(1, 128, 1, 1), float32] */;
nn.adaptive_avg_pool2d(%5, output_size=[1, 1]) /* ty=Tensor[(1, 128, 1, 1), float32] */
}
The relay graph after relay.quantize.Partition
pass:
fn (%input0: Tensor[(1, 3, 224, 224), float32]) -> Tensor[(1, 128, 1, 1), float32] {
%0 = nn.conv2d(%input0, meta[relay.Constant][0] /* ty=Tensor[(64, 3, 7, 7), float32] */, strides=[2, 2], padding=[3, 3, 3, 3], channels=64, kernel_size=[7, 7]) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%1 = nn.relu(%0) /* ty=Tensor[(1, 64, 112, 112), float32] */;
%2 = nn.max_pool2d(%1, pool_size=[3, 3], strides=[2, 2], padding=[1, 1, 1, 1]) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%3 = annotation.cast_hint(%2, meta[relay.attrs.CastHintAttrs][0]) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%4 = annotation.stop_fusion(%3) /* ty=Tensor[(1, 64, 56, 56), float32] */;
%5 = nn.conv2d(%4, meta[relay.Constant][1] /* ty=Tensor[(128, 64, 3, 3), float32] */, padding=[1, 1, 1, 1], channels=128, kernel_size=[3, 3]) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%6 = nn.relu(%5) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%7 = annotation.cast_hint(%6, meta[relay.attrs.CastHintAttrs][1]) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%8 = annotation.stop_fusion(%7) /* ty=Tensor[(1, 128, 56, 56), float32] */;
%9 = nn.adaptive_avg_pool2d(%8, output_size=[1, 1]) /* ty=Tensor[(1, 128, 1, 1), float32] */;
nn.adaptive_avg_pool2d(%9, output_size=[1, 1]) /* ty=Tensor[(1, 128, 1, 1), float32] */
}