Layout of custom layer

I have a custom layer “nucfpga.l2norm”, and follow the doc Convert Layout Pass — tvm 0.8.dev0 documentation (apache.org) ,I have set the FInferCorrectLayout.

RELAY_REGISTER_OP("nucfpga.l2norm")
    .set_attrs_type<NucfpgaL2normAttrs>()
    .set_num_inputs(2)
    .add_argument("data", "Tensor", "The input tensor.")
    .add_argument("scale", "1D Tensor", "scale.")
    .set_support_level(3)
    .add_type_rel("nucfpgaL2norm", NucfpgaL2normRel)
    .set_attr<FInferCorrectLayout>("FInferCorrectLayout", NucfpgaL2normInferCorrectLayout);

Array<Array<Layout>> NucfpgaL2normInferCorrectLayout(const Attrs& attrs,
                          const Array<Layout>& new_in_layouts,
                          const Array<Layout>& old_in_layouts,
                          const Array<tvm::relay::Type>& old_in_types) {

  ICHECK_EQ(old_in_layouts.size(), 2U);
  ICHECK_EQ(old_in_types.size(), 2U);
  Layout data_layout = old_in_layouts[0];
  if (new_in_layouts.defined()) {
    ICHECK_EQ(new_in_layouts.size(), 2U);
  }
  return Array<Array<Layout>>{{data_layout, Layout("C")}, {data_layout}};
}

In the host ,

        desired_layouts = {'nn.conv2d': ['NHWC', 'OHWI16o16i']} 
        # Convert the layout to NCHW
        # RemoveUnunsedFunctions is used to clean up the graph.
        seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(),
                                        relay.transform.ConvertLayout(desired_layouts)])
        with tvm.transform.PassContext(opt_level=3):
          mod = seq(mod)
        print(mod)

and get output

  %73 = nn.relu(%72) /* ty=Tensor[(1, 38, 38, 512), int8] */;
  %74 = layout_transform(%73, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 512, 38, 38), int8] */;
  %75 = nucfpga.l2norm(%74, %L2Norm.weight, ifrac_input="", ifrac_kernel="", ifrac_output="") /* ty=Tensor[(1, 512, 38, 38), int8] */;
  %76 = layout_transform(%75, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 38, 38, 512), int8] */;

My l2norm layer input layout transfrom NHWC to NCHW,and output layout transfrom NCHW to NHWC,but my op does not need do this because the compute is already NHWC ,I do not need the layout_transform() .

What should I do?

Thanks.