How to open the 'L' dim tunning space in MatMul

The Matmul example in the auto tunning only open the N, M dim tunning space, tried to open the L dim tuning space of Matmul with following code, but does not work, what is the issue here?

@autotvm.template
def matmul(N, L, M, dtype):
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)

y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
##### define space begin #####
cfg = autotvm.get_config()
cfg.define_split("tile_y", y, num_outputs=2)
cfg.define_split("tile_x", x, num_outputs=2)
cfg.define_split("tile_k", k, num_outputs=3)

output = C
OL = s.cache_write(C, 'local')

# create cache stage
AA = s.cache_read(A, 'shared', [OL])
WW = s.cache_read(B, 'shared', [OL])
AL = s.cache_read(AA, 'local', [OL])
WL = s.cache_read(WW, 'local', [OL])

# schedule according to config
yo, yi = cfg["tile_y"].apply(s, C, y)
xo, xi = cfg["tile_x"].apply(s, C, x)

######### bind ###########
s[C].bind(yo,tvm.thread_axis("blockIdx.y")) 
s[C].bind(xo,tvm.thread_axis("blockIdx.x"))
s[C].bind(yi,tvm.thread_axis("threadIdx.y"))
s[C].bind(xi,tvm.thread_axis("threadIdx.x"))
s[C].reorder(yo, xo, yi, xi)
s[OL].compute_at(s[C],xi)

# tile reduction axes
y, x = s[OL].op.axis
k = s[OL].op.reduce_axis[0]
ko,km, ki = cfg['tile_k'].apply(s, OL, k)
s[OL].reorder(ko, km, ki, y, x)

s[AA].compute_at(s[OL], ko)
s[WW].compute_at(s[OL], ko)
s[AL].compute_at(s[OL], km)
s[WL].compute_at(s[OL], km)

# cooperative fetching
for load in [AA, WW]:
    y, x = s[load].op.axis
    fused = s[load].fuse(y, x)
    ty, fused = s[load].split(fused, nparts=cfg["tile_y"].size[0])
    tx, fused = s[load].split(fused, nparts=cfg["tile_x"].size[0])
    s[load].bind(ty, tvm.thread_axis("threadIdx.y"))
    s[load].bind(tx, tvm.thread_axis("threadIdx.x"))
######### bind end #######

return s, [A, B, C]

What is the error you are getting here? It looks like the split on the ā€˜L’ axis is never used (define_split is called, but .split does not use the result).