Now I find the function MakeFMA in src/tir/transforms/lower_intrin.cc, maybe here suits to address the combine .
353 PrimExpr MakeFMA(const PrimExpr& a, const PrimExpr& b, const PrimExpr& c, const AddNode* op) {
354 // emit fma instruction: a * b + c
355 PrimExpr lhs = SwapBroadcastCast(a);
356 PrimExpr rhs = SwapBroadcastCast(b);
357
358 if (fma_ != nullptr && op->dtype.is_float()) {
359 PrimExpr r = fma_(Call(op->dtype, builtin::fma(), {lhs, rhs, c}));
360 if (r.defined()) return this->VisitExpr(r);
361 } else {
362 if (!lhs.same_as(a) || !rhs.same_as(b)) {
363 PrimExpr mul = this->VisitExpr(Mul(lhs, rhs));
364 return Add(mul, this->VisitExpr(c));
365 }
366 }
367 return IRMutatorWithAnalyzer::VisitExpr_(op);
368 }