Why relay.Function use a relay.Call can't build successfully through relay.build?

Hi, I’m trying to separate OPs into different relay.Function in order to use their own schedule, Function used a relay.Call to call a sub function:

fn (%xx: Tensor[(64, 64), int8], %ww: Tensor[(64), int8], %bb: Tensor[(64), int8], %y: Tensor[(64, 64), int8]) {
  %0 = fn (%x: Tensor[(64, 64), int8], %w: Tensor[(64), int8], %b: Tensor[(64), int8], Primitive=1) {
qnn.aie.layer_norm(%x, %w, %b, epsilon=1.52588e-05f, ln_shift=7, affine_shift=8)
  };
  %1 = %0(%xx, %ww, %bb);
  nn.dense(%1, %y, units=64, out_dtype="int8")
}

But it seems can’t pass the checkings when relay.build for its CallNode.op is Function rather then Op, the checkings are:

1.python/tvm/relay/backend/compile_engine.py(282)lower_call():  assert isinstance(call.op, tvm.ir.Op)
2.te_compiler_cache.cc::VisitExpr_(const CallNode* call_node):  Check failed: (call_node->[op.as](http://op.as)<OpNode>()) is false: Primitive function only allows call into primitive ops

So my question is: why call a Function can’t be supported for building or lowering? FuseOps Pass usually generate code with the Call Function Pattern like this, how to support it when building? Thank you very much.

IIUC, after FuseOps all ops have to be in primitive functions including single ops, so your IR might need to be

fn (%xx: Tensor[(64, 64), int8], %ww: Tensor[(64), int8], %bb: Tensor[(64), int8], %y: Tensor[(64, 64), int8]) {
  %0 = fn (%x: Tensor[(64, 64), int8], %w: Tensor[(64), int8], %b: Tensor[(64), int8], Primitive=1) {
    qnn.aie.layer_norm(%x, %w, %b, epsilon=1.52588e-05f, ln_shift=7, affine_shift=8)
  };
  %1 = %0(%xx, %ww, %bb);
  %2 = fn (%x: Tensor[(64, 64), int8], %w: Tensor[(64), int8], Primitive=1) {
    nn.dense(%x, %w, units=64, out_dtype="int8")
  };
  %2(%1, %y);
}

Yeah, my IR after Fuse is like this now:

def @main(%xx: Tensor[(64, 64), int8], %ww: Tensor[(64), int8], %bb: Tensor[(64), int8], %y: Tensor[(64, 64), int8]) -> Tensor[(64, 64), int8] {
  %0 = fn (%x: Tensor[(64, 64), int8], %w: Tensor[(64), int8], %b: Tensor[(64), int8], Primitive=1) -> Tensor[(64, 64), int8] {
    qnn.aie.layer_norm(%x, %w, %b, epsilon=1.52588e-05f, ln_shift=7, affine_shift=8) /* ty=Tensor[(64, 64), int8] */
  };
  %1 = %0(%xx, %ww, %bb) /* ty=Tensor[(64, 64), int8] */;
  %2 = fn (%p0: Tensor[(64, 64), int8], %p1: Tensor[(64, 64), int8], Primitive=1) -> Tensor[(64, 64), int8] {
    nn.dense(%p0, %p1, units=64, out_dtype="int8") /* ty=Tensor[(64, 64), int8] */
  };
  %2(%1, %y) /* ty=Tensor[(64, 64), int8] */
}

after Partition pass looks like this:

   #[version = "0.0.5"]
   def @main(%xx: Tensor[(64, 64), int8], %ww: Tensor[(64), int8], %bb: Tensor[(64), int8], %y: Tensor[(64, 64), int8]) -> Tensor[(64, 64), int8] {
      %0 = fn (%x: Tensor[(64, 64), int8], %w: Tensor[(64), int8], %b: Tensor[(64), int8], Primitive=1) -> Tensor[(64, 64), int8] {
        @tvmgen_default_versal_aie_main_2(%x, %w, %b) /* ty=Tensor[(64, 64), int8] */
      };
      %1 = %0(%xx, %ww, %bb) /* ty=Tensor[(64, 64), int8] */;
      %2 = fn (%p0: Tensor[(64, 64), int8], %p1: Tensor[(64, 64), int8], Primitive=1) -> Tensor[(64, 64), int8] {
        @tvmgen_default_versal_aie_main_0(%p0, %p1) /* ty=Tensor[(64, 64), int8] */
      };
      %2(%1, %y) /* ty=Tensor[(64, 64), int8] */
    }

    def @tvmgen_default_versal_aie_main_0(%versal_aie_0_i0: Tensor[(64, 64), int8], %versal_aie_0_i1: Tensor[(64, 64), int8], Inline=1, Compiler="versal_aie", global_symbol="tvmgen_default_versal_aie_main_0", Primitive=1) -> Tensor[(64, 64), int8] {
      nn.dense(%versal_aie_0_i0, %versal_aie_0_i1, units=64, out_dtype="int8") /* ty=Tensor[(64, 64), int8] */
    }

    def @tvmgen_default_versal_aie_main_2(%versal_aie_2_i0: Tensor[(64, 64), int8], %versal_aie_2_i1: Tensor[(64), int8], %versal_aie_2_i2: Tensor[(64), int8], Inline=1, Compiler="versal_aie", global_symbol="tvmgen_default_versal_aie_main_2", Primitive=1) -> Tensor[(64, 64), int8] {
      qnn.aie.layer_norm(%versal_aie_2_i0, %versal_aie_2_i1, %versal_aie_2_i2, epsilon=1.52588e-05f, ln_shift=7, affine_shift=8) /* ty=Tensor[(64, 64), int8] */
    }

But still can’t pass the checkings when building, it seems that the checking there only accept an Op as Call.op, not accept a Function as Call.op for the IR fragment:

%2 = fn (%p0: Tensor[(64, 64), int8], %p1: Tensor[(64, 64), int8], Primitive=1) -> Tensor[(64, 64), int8] {
    @tvmgen_default_versal_aie_main_0(%p0, %p1) /* ty=Tensor[(64, 64), int8] */
  };

Is there anything I miss?

If I made the IR like this, It would be OK for building:

def @main(%x: Tensor[(64, 64), int8], %a: Tensor[(64, 64), int8], %w: Tensor[(64), int8], %b: Tensor[(64), int8], %y: Tensor[(64, 64), int8], %yy: Tensor[(64, 64), int8]) -> Tensor[(64, 64), int8] {
   %0 = @tvmgen_default_versal_aie_main_0(%x, %a, %w, %b, %y) /* ty=Tensor[(64, 64), int8] */;
  @tvmgen_default_versal_aie_main_5(%0, %yy) /* ty=Tensor[(64, 64), int8] */
}
def @tvmgen_default_versal_aie_main_0(%versal_aie_0_i0: Tensor[(64, 64), int8], %versal_aie_0_i1: Tensor[(64, 64), int8], %versal_aie_0_i2: Tensor[(64), int8], %versal_aie_0_i3: Tensor[(64), int8], %versal_aie_0_i4: Tensor[(64, 64), int8], Inline=1, Compiler="versal_aie", global_symbol="tvmgen_default_versal_aie_main_0", Primitive=1) -> Tensor[(64, 64), int8] {
  qnn.aie.layer_norm(%versal_aie_0_i0, %versal_aie_0_i1) /* ty=Tensor[(64, 64), int8] */;
}

def @tvmgen_default_versal_aie_main_5(%versal_aie_5_i0: Tensor[(64, 64), int8], %versal_aie_5_i1: Tensor[(64, 64), int8], Inline=1, Compiler="versal_aie", global_symbol="tvmgen_default_versal_aie_main_5", Primitive=1) -> Tensor[(64, 64), int8] {
  nn.dense(%versal_aie_5_i0, %versal_aie_5_i1, units=64, out_dtype="int8") /* ty=Tensor[(64, 64), int8] */
}

So it seems that confused “a Function assignment using the Function Declaration” and “a real Function call”?