```
import tvm
from tvm import te
M = 64
K = 256
N = 256
dtype = "float32"
k = te.reduce_axis((0, K), name='k')
A = te.placeholder((M, K), dtype=dtype, name='A')
B = te.placeholder((N, K), dtype=dtype, name='B')
C = te.compute((M, N), lambda i, j: te.sum(A[i, k] * B[j, k], axis=k), name='C')
s = te.create_schedule(C.op)
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
y2, y1 = s[C].split(y, factor=4)
y3, y2 = s[C].split(y2, factor=4)
x2, x1 = s[C].split(x, factor=8)
x3, x2 = s[C].split(x2, factor=8)
k2, k1 = s[C].split(k, factor=2)
k3, k2 = s[C].split(k2, factor=2)
s[C].reorder(y3, x3, k3, y2, x2, k2, y1, x1, k1)
s[C].prefetch(A, y1, 1)
target = "llvm"
lib = tvm.build(s, [A, B, C], target)
```

Error message:

```
/tvm/src/tir/transforms/storage_flatten.cc", line 1023
Check failed: e.buffer->shape.size() == op->bounds.size() (2 vs. 0) : Prefetch dim should be the same as buffer dim
```