Hi Experts, @Lianminzheng @jcf94 I tried to define a new operator for lstm network.The computation declaration for lstm op has been tested and it is correct. Now I want to use auto-scheduling to automatically generate a large search space and find a good schedule in the space. But it can not generate the schedule successfully, my code is here:
from tvm import topi
def unbind_func(data):
input_list = topi.split(data, indices_or_sections=data.shape[0].value, axis=0)
input_sq_list = []
for item in input_list:
input_sq = topi.squeeze(item, axis=0)
input_sq_list.append(input_sq)
return input_sq_list
def lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, out_dtype=None):
"""The default implementation of lstm_layer in topi.
Parameters
----------
data : tvm.te.Tensor
3-D with shape [x, y, z]
hx : tvm.te.Tensor
2-D with shape [a, b]
cx : tvm.te.Tensor
2-D with shape [a, b]
w_ih : tvm.te.Tensor
2-D with shape
w_hh : tvm.te.Tensor
2-D with shape
b_ih : tvm.te.Tensor
1-D with shape
b_hh : tvm.te.Tensor
1-D with shape
out_dtype : str
The output type. This is used for mixed precision.
Returns
-------
output : tvm.te.Tensor
3-D with shape
hy: tvm.te.Tensor
3-D with shape
cy: tvm.te.Tensor
3-D with shape
"""
assert len(data.shape) == 3 and len(hx.shape) == 2 and len(cx.shape) == 2 and len(w_ih.shape) == 2 \
and len(w_hh.shape) == 2 and len(b_ih.shape) == 1 and len(b_hh.shape) == 1, "only support 2-dim dense"
if out_dtype is None:
out_dtype = data.dtype
# unbind input data
input_list = unbind_func(data)
step_outputs = []
for input in input_list:
"""input is 2D tensor"""
linear_ih = topi.nn.dense(input, w_ih, b_ih)
linear_hh = topi.nn.dense(hx, w_hh, b_hh)
gates = topi.add(linear_ih, linear_hh)
chunked_gates = topi.split(gates, indices_or_sections=4, axis=1)
assert (len(chunked_gates) == 4)
in_gate = topi.sigmoid(chunked_gates[0])
forget_gate = topi.sigmoid(chunked_gates[1])
cell_gate = topi.tanh(chunked_gates[2])
out_gate = topi.sigmoid(chunked_gates[3])
cy = topi.add(topi.multiply(forget_gate, cx), topi.multiply(in_gate, cell_gate))
hy = topi.multiply(out_gate, topi.tanh(cy))
step_outputs.append(hy)
hx = hy
cx = cy
output = topi.stack(step_outputs, axis=0)
return output
import tvm
from tvm import te, auto_scheduler, topi
@auto_scheduler.register_workload
def lstm_layers(hx, cx, w_ih, w_hh, b_ih, b_hh):
data = te.placeholder((2, 1, 240), name="data")
out = topi.nn.lstm_layer(data, hx, cx, w_ih, w_hh, b_ih, b_hh, out_dtype="float32")
return [data, hx, cx, w_ih, w_hh, b_ih, b_hh, out]
target = tvm.target.Target("cuda")
# the layer in lstm
hx = te.placeholder((1, 1024), name='hx')
cx = te.placeholder((1, 1024), name='cx')
w_ih = te.placeholder((4096, 240), name='w_ih')
w_hh = te.placeholder((4096, 1024), name='w_hh')
b_ih = te.placeholder((4096,), name='b_ih')
b_hh = te.placeholder((4096,), name='b_hh')
task = auto_scheduler.create_task(lstm_layers, (hx, cx, w_ih, w_hh, b_ih, b_hh), target)
# Inspect the computational graph
print(task.compute_dag)
measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=1,
runner=measure_ctx.runner,
measure_callbacks=[auto_scheduler.RecordToFile("lstm_layers.json")],
)
sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)
print(tvm.lower(sch, list(args), simple_mode=True))
Only the data = te.placeholder((1, 1, 240), name=“data”), the schedule can be generated successfully, when data = te.placeholder((?, 1, 240), name=“data”)(and ?>1), the DAGgraph can be obtained and it shows “Get devices for measurement successfully!” , but the schedule can not generate successfully. The elaborate error is following:
Traceback (most recent call last):
File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/tutorials/auto_scheduler/tune_lstm_layers.py", line 109, in <module>
sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)
File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/auto_scheduler/auto_schedule.py", line 213, in auto_schedule
sch, tensors = _ffi_api.AutoSchedule(search_policy, tuning_options)
File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (7) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(TVMFuncCall+0x61) [0x7fa9afe26ec1]
[bt] (6) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xacaacd) [0x7fa9af1cdacd]
[bt] (5) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::AutoSchedule(tvm::auto_scheduler::SearchPolicy, tvm::auto_scheduler::TuningOptions)+0x116) [0x7fa9af1cd1b6]
[bt] (4) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::Search(int, int, int, tvm::auto_scheduler::ProgramMeasurer)+0xa82) [0x7fa9af262f52]
[bt] (3) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SearchOneRound(int, tvm::runtime::Array<tvm::auto_scheduler::State, void>*)+0x1c3) [0x7fa9af261f83]
[bt] (2) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::SketchPolicyNode::SampleInitPopulation(tvm::runtime::Array<tvm::auto_scheduler::State, void> const&, int)+0x21e) [0x7fa9af25d39e]
[bt] (1) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::support::parallel_for(int, int, std::function<void (int)> const&, int, std::function<std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > > (int, int, int, int)>)+0x1273) [0x7fa9af7fb413]
[bt] (0) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x5f) [0x7fa9af1d171f]
[bt] (8) /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xbd6df) [0x7fa9ab9976df]
[bt] (7) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::thread::_State_impl<std::_Bind_simple<std::packaged_task<void (std::vector<int, std::allocator<int> > const&, std::function<void (int)> const&)> (std::vector<int, std::allocator<int> >, std::function<void (int)>)> >::_M_run()+0xd3) [0x7fa9af7fbb13]
[bt] (6) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(void std::call_once<void (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*), std::__future_base::_State_baseV2*, std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*>(std::once_flag&, void (std::__future_base::_State_baseV2::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*), std::__future_base::_State_baseV2*&&, std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*&&, bool*&&)+0x71) [0x7fa9af7fba01]
[bt] (5) /lib/x86_64-linux-gnu/libpthread.so.0(+0xf827) [0x7fa9d5c09827]
[bt] (4) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*)+0x29) [0x7fa9af7fb8e9]
[bt] (3) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0x10f6072) [0x7fa9af7f9072]
[bt] (2) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb5a0f2) [0x7fa9af25d0f2]
[bt] (1) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(tvm::auto_scheduler::InitThreadBind::Apply(tvm::auto_scheduler::SketchPolicyNode*, tvm::auto_scheduler::State*, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>*) const+0x2f9) [0x7fa9af2743e9]
[bt] (0) /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/build/libtvm.so(+0xb671ff) [0x7fa9af26a1ff]
File "/root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/support/parallel_for.cc", line 92
TVMError: Parallel_for error with [21:43:57] /root/test6_root/jialipang/TVM-Git-1009/incubator-tvm-master/src/auto_scheduler/search_policy/sketch_policy_rules.cc:710: Check failed: HasCrossThreadReduction(*state, stage_id):
Can you please help me to understand and fix the above issue.
Thanks