Hi all.
Problem description
I have been trying to write a custom schedule for a dense layer. I have been using the vta/python/vta/top/vta_dense.py file as a guide. I have written my compute and registered it using @autotvm.register_topi_compute, and then created the schedule and registered it using @autotvm.register_topi_schedule.
My compute is almost equal to the vta example, but only with 2 dimensions instead of 4. My schedule will be a little different, but I am getting an error directly at the beginning of the schedule function, every time I want to call tvm.lower to print the schedule and see the changes I am doing to it.
This is the error:
Check failed: (allow_alloc) is false: Cannot find the Realization point of tensor Tensor(shape=[1], op.name=placeholder)
A similar error was reported here but there was no solution.
Custom code used
This is my module, before calling tvm.build:
#[version = "0.0.5"]
def @main(%serving_default_x:0: Tensor[(8, 8), uint8], %serving_default_y:0: Tensor[(8, 8), uint8]) {
%0 = qnn.requantize(%serving_default_x:0, 0.996078f, 0, 0.996078f, -128, out_dtype="int8");
%1 = qnn.requantize(%serving_default_y:0, 0.996078f, 0, 0.996078f, -128, out_dtype="int8");
%2 = reshape(%0, newshape=[-1, 8]);
%3 = transpose(%1, axes=[1, 0]);
%4 = qnn.dense(%2, %3, -128, -128, 0.996078f, 0.996078f, units=8, out_dtype="int32");
%5 = qnn.requantize(%4, 0.992172f, 0, 1082.65f, -128, out_dtype="int8");
qnn.requantize(%5, 1082.65f, -128, 1082.65f, 0, out_dtype="uint8")
}
I have registered a new dense_strategy with the following code:
@_strategy.dense_strategy.register("custom_acc")
def dense_strategy_custom_acc(attrs, inputs, out_type, target):
"""dense custom_acc strategy"""
if len(inputs[0].shape) == 2:
strategy = OpStrategy()
strategy.add_implementation(
_strategy.wrap_compute_dense(dense),
_strategy.wrap_topi_schedule(schedule_dense),
name="dense.custom_acc",
)
return strategy
return None
I have registered my compute with the following code:
@autotvm.register_topi_compute("dense.custom_acc")
def dense(cfg, data, weight, bias=None, out_dtype=None):
"""Dense function declaration."""
if len(data.shape) != 2 or len(weight.shape) != 2:
raise topi.InvalidShapeError()
# Derive shapes
ishape = topi.utils.get_const_tuple(data.shape)
wshape = topi.utils.get_const_tuple(weight.shape)
oshape = (data.shape[0], weight.shape[1])
# Reduction axes
assert ishape[0] == wshape[0]
k_o = te.reduce_axis((0, wshape[1]), name="k_o")
res = te.compute(
oshape,
lambda b_o, c_o: te.sum(
data[b_o, k_o].astype(out_dtype) * weight[c_o, k_o].astype(out_dtype),
axis=[k_o],
),
name="res",
tag="dense",
)
cfg.add_flop(2 * np.prod(topi.utils.get_const_tuple(oshape)) * ishape[0] * ishape[1])
return res
And my schedule like this (I only put the beginning because thats where the error is):
@autotvm.register_topi_schedule("dense.custom_acc")
def schedule_dense(cfg, outs):
"""Dense schedule."""
assert len(outs) == 1
output = outs[0]
const_ops = []
ewise_inputs = []
ewise_ops = []
dense_res = []
assert "int" in output.op.input_tensors[0].dtype
def _traverse(op):
if topi.tag.is_broadcast(op.tag):
if not op.same_as(output.op):
if not op.axis:
const_ops.append(op)
else:
ewise_ops.append(op)
for tensor in op.input_tensors:
if isinstance(tensor.op, tvm.te.PlaceholderOp):
ewise_inputs.append((op, tensor))
else:
_traverse(tensor.op)
else:
assert op.tag == "dense"
dense_res.append(op)
_traverse(output.op)
assert len(dense_res) == 1
dense_stage = dense_res[0].output(0)
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
sch = te.create_schedule([x.op for x in outs])
##### space definition begin #####
x, y = sch[dense_stage].op.axis
(z,) = sch[dense_stage].op.reduce_axis
###### space definition end ######
data, weight = dense_stage.op.input_tensors
print(tvm.lower(sch,[data,weight,output]))
breakpoint()
The “print” line at the end of the schedule function is the one that is raising the error. I am finding it difficult to debug this issue. Any recommendations?
EDIT: of course, if you have another idea to debug the changes that are done inside a register_topi_schedule function instead of printing the schedule at every step using tvm.lower, I would really appreciate the feedback.