Why excactly nn.batch_norm has a 'TOpPattern' but nn.layer_norm doesn't?

Hi, I am trying to dive down into the op fusion and graph optimization mechanism in tvm. I constructed two simple nets with three operators.

 def layer_norm(data, gamma=None, beta=None, **kwargs):
    name = kwargs.get("name")
    kwargs.pop("name")
    if not gamma:
        gamma = relay.var(name + "_gamma")
    if not beta:
        beta = relay.var(name + "_beta")
    return relay.nn.layer_norm(data, gamma=gamma, beta=beta, **kwargs)

def mat_mul(data1, data2, **kwargs):
    name = kwargs.get("name")
    kwargs.pop("name")
    return relay.nn.matmul(data1, data2, **kwargs) 

def simplenet(data1, data2, name):
    matmul = mat_mul(data1, data2, name=name + '_mm')
    ln = layer_norm(matmul, name=name + '_ln')
    act = relay.nn.relu(data=ln)
    return act

and

# BN
def batch_norm(data, gamma=None, beta=None, moving_mean=None, moving_var=None, **kwargs):
name = kwargs.get("name")
kwargs.pop("name")
if not gamma:
    gamma = relay.var(name + "_gamma")
if not beta:
    beta = relay.var(name + "_beta")

if not moving_mean:
    moving_mean = relay.var(name + "_moving_mean")
if not moving_var:
    moving_var = relay.var(name + "_moving_var")
return relay.nn.batch_norm(data, gamma=gamma, beta=beta, moving_mean=moving_mean, moving_var=moving_var, **kwargs)[0]

# conv2d
def conv2d(data, weight=None, **kwargs):
name = kwargs.get("name")
kwargs.pop("name")
if not weight:
    weight = relay.var(name + "_weight")
return relay.nn.conv2d(data, weight, **kwargs)

# conv2d+BN+ReLU
def simplenet(data, name, channels, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), epsilon=1e-5):
conv = conv2d(
        data=data,
        channels=channels,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_layout='NCHW',
        name=name+'_conv')
bn = batch_norm(data=conv, epsilon=epsilon, name=name + '_bn')
act = relay.nn.relu(data=bn)
return act

when I was executing relay.transform.FuseOps(), for the first net, I got:

Attribute TOpPattern has not been registered for nn.layer_norm

And FuseOPs for the second net with batch_norm just works well. I have noticed that RELAY_REGISTER_OP("nn.batch_norm") does set a kOutEWiseFusable attr for batch_norm, while RELAY_REGISTER_OP("nn.layer_norm") does not.

I have referred to https://discuss.tvm.apache.org/t/why-doesnt-nn-layer-norm-have-toppattern/7046, it seems like a ‘TOpPattern’ will hold the optimization back from doing some split. But I wonder why this issue does not apply to batch_norm? What is the main difference?

Any help would be appreciated! :slight_smile: