[BackTrace] really basic code triggers autoTVM exception

huangteng · April 19, 2021, 7:26am

Hi, I am struggling with autoTVM tune with my self-written te.compute expression. I have an impression that autoTVM tune does not support dynamic indexing tensor array using the value from another tensor, cause it will trigger some strange backtrace which is not so directly relevant with my expression. The backtrace is like below:

Traceback (most recent call last):
  99: _PyEval_EvalFrameDefault
        at /tmp/build/80754af9/python_1585235154784/work/Python/ceval.c:3559
  98: do_call_core
        at /tmp/build/80754af9/python_1585235154784/work/Python/ceval.c:5034
  97: PyVectorcall_Call
        at /tmp/build/80754af9/python_1585235154784/work/Objects/call.c:199
  96: _PyFunction_Vectorcall
        at /tmp/build/80754af9/python_1585235154784/work/Objects/call.c:410
  95: function_code_fastcall
        at /tmp/build/80754af9/python_1585235154784/work/Objects/call.c:283
  94: _PyEval_EvalFrameDefault
        at /tmp/build/80754af9/python_1585235154784/work/Python/ceval.c:3486
  93: call_function
        at /tmp/build/80754af9/python_1585235154784/work/Python/ceval.c:4987
  92: _PyObject_Vectorcall
        at /tmp/build/80754af9/python_1585235154784/work/Include/cpython/abstract.h:127
  91: _PyFunction_Vectorcall
        at /tmp/build/80754af9/python_1585235154784/work/Objects/call.c:410
  90: function_code_fastcall
        at /tmp/build

I wrote a minimum code piece to reproduce this problem, please download and run to see if we have encountered the same issue. Thanks

import sys
import numpy as np
import tvm
from tvm import autotvm, te
from tvm import topi
import tvm.topi.testing
from tvm.topi.utils import get_const_tuple, traverse_inline, get_const_int
import tvm.contrib.sparse as tvmsp
from collections import namedtuple
import time
import scipy.sparse as sp
import argparse
import logging
import sys
from tvm.topi.sparse_conv import fkw_data_struct_te_v2

tgt = 'llvm'
ctx = tvm.device(tgt, 0)

def sample_compute(data, index):
    BN, OC, OH, OW = data.shape

    def _compute_pattern_basic(*indices):
        n, oc, oh, ow = indices
        stride_index = index[oc]
        # here triggers the backtrace (replace stride_index with oh will solve the issue)
        return data[n, oc, stride_index, ow]

    return te.compute((BN, OC, OH, OW),
                      lambda n, oc, oh, ow: _compute_pattern_basic(n, oc, oh, ow),
                      name='comp')


def schedule_sample_compute(cfg, outs):
    """Create schedule"""
    s = te.create_schedule([x.op for x in outs])
    C = outs[0]
    n, oc, oh, ow = s[C].op.axis
    outer_axe, inner_axe = cfg.define_reorder("reorder", [oh, ow], policy="all")
    cfg["reorder"].apply(s, C, [n, oc, outer_axe, inner_axe])
    return s


def sample_autotvm_tune(N, C, H, W):
    # logging config (for printing tuning log to screen)
    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))

    task = autotvm.task.create("sample/autotvm_test",
                               args=(N, C, H, W),
                               target='llvm')

    # The timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=1, min_repeat_ms=100, timeout=4)
    )

    # Begin tuning
    tuner = autotvm.tuner.XGBTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[autotvm.callback.log_to_file('sample_autotvm.log')])


@autotvm.template("sample/autotvm_test")
def sample_template(N, C, H, W):
    data = te.placeholder(shape=(N, C, H, W), dtype='float32', name='data')
    index = te.placeholder(shape=(H,), dtype='int', name='index')
    Y = sample_compute(data, index) 

    cfg = autotvm.get_config()
    cfg.add_flop(data.shape[0] * data.shape[1] * data.shape[2] * data.shape[3])
    s = schedule_sample_compute(cfg, [Y])
    return s, [data, index, Y]


if __name__ == "__main__":
    with tvm.target.Target(tgt):
        N, C, H, W = (1, 1, 5, 5)
        # test te.compute basic
        a = tvm.nd.array(np.ones((N, C, H, W), dtype='float32'), ctx)
        b = tvm.nd.array(np.ones((H,), dtype='int32'), ctx)
        c = tvm.nd.array(np.zeros((N, C, H, W), dtype='float32'), ctx)
        s, arg_bufs = sample_template(N, C, H, W) 
        func = tvm.build(s, arg_bufs)
        func(a, b, c)
        # start tunning
        sample_autotvm_tune(N, C, H, W)

huangteng · April 24, 2021, 1:55pm

@tqchen @comaniac Really appreciated that if you could share any tips, I stucked at this problem for nearly a month.

FrozenGene · April 28, 2021, 1:22pm

huangteng:

import sys
import numpy as np
import tvm
from tvm import autotvm, te
from tvm import topi
import tvm.topi.testing
from tvm.topi.utils import get_const_tuple, traverse_inline, get_const_int
import tvm.contrib.sparse as tvmsp
from collections import namedtuple
import time
import scipy.sparse as sp
import argparse
import logging
import sys
from tvm.topi.sparse_conv import fkw_data_struct_te_v2

tgt = 'llvm'
ctx = tvm.device(tgt, 0)

def sample_compute(data, index):
    BN, OC, OH, OW = data.shape

    def _compute_pattern_basic(*indices):
        n, oc, oh, ow = indices
        stride_index = index[oc]
        # here triggers the backtrace (replace stride_index with oh will solve the issue)
        return data[n, oc, stride_index, ow]

    return te.compute((BN, OC, OH, OW),
                      lambda n, oc, oh, ow: _compute_pattern_basic(n, oc, oh, ow),
                      name='comp')


def schedule_sample_compute(cfg, outs):
    """Create schedule"""
    s = te.create_schedule([x.op for x in outs])
    C = outs[0]
    n, oc, oh, ow = s[C].op.axis
    outer_axe, inner_axe = cfg.define_reorder("reorder", [oh, ow], policy="all")
    cfg["reorder"].apply(s, C, [n, oc, outer_axe, inner_axe])
    return s


def sample_autotvm_tune(N, C, H, W):
    # logging config (for printing tuning log to screen)
    logging.getLogger('autotvm').setLevel(logging.DEBUG)
    logging.getLogger('autotvm').addHandler(logging.StreamHandler(sys.stdout))

    task = autotvm.task.create("sample/autotvm_test",
                               args=(N, C, H, W),
                               target='llvm')

    # The timeout for running is 4 seconds
    measure_option = autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(repeat=1, min_repeat_ms=100, timeout=4)
    )

    # Begin tuning
    tuner = autotvm.tuner.XGBTuner(task)
    tuner.tune(n_trial=2,
               measure_option=measure_option,
               callbacks=[autotvm.callback.log_to_file('sample_autotvm.log')])


@autotvm.template("sample/autotvm_test")
def sample_template(N, C, H, W):
    data = te.placeholder(shape=(N, C, H, W), dtype='float32', name='data')
    index = te.placeholder(shape=(H,), dtype='int', name='index')
    Y = sample_compute(data, index) 

    cfg = autotvm.get_config()
    cfg.add_flop(data.shape[0] * data.shape[1] * data.shape[2] * data.shape[3])
    s = schedule_sample_compute(cfg, [Y])
    return s, [data, index, Y]


if __name__ == "__main__":
    with tvm.target.Target(tgt):
        N, C, H, W = (1, 1, 5, 5)
        # test te.compute basic
        a = tvm.nd.array(np.ones((N, C, H, W), dtype='float32'), ctx)
        b = tvm.nd.array(np.ones((H,), dtype='int32'), ctx)
        c = tvm.nd.array(np.zeros((N, C, H, W), dtype='float32'), ctx)
        s, arg_bufs = sample_template(N, C, H, W) 
        func = tvm.build(s, arg_bufs)
        func(a, b, c)
        # start tunning
        sample_autotvm_tune(N, C, H, W)

@huangteng I can not reproduce it. Could you try the latest code of tvm?

vinx13 · April 28, 2021, 9:44pm

It is related to random tensor being used as indices during auto tuning, which causes illegal memory access. Setting custom input for tuning is a missing feature for AutoTVM (auto scheduler has implemented some similar logics to address this issue)

huangteng · April 29, 2021, 1:06am

Thanks, indeed this is the root cause. I found that in

github.com

apache/tvm/blob/main/python/tvm/autotvm/measure/measure_methods.py#L584-L590


        "Please make sure USE_RANDOM is ON in the config.cmake " "on the remote devices"
    )
args = [nd.array(np.zeros(x[0], dtype=x[1]), device=dev) for x in build_result.arg_info]
if "scatter" not in measure_input.task.name:
    # the index tensor of scatter op cannot be randomly initialized
    for arg in args:
        random_fill(arg)

It checks the measure_input.task.name and if it is “scatter” it will fill with args. However, it seems that there is no way to set this when creating the tunner task, don’t know how this scatter is used.

By the way, could you please share some details about how auto scheduler workaround this issue ? And whether it will be a complex work to apply the similar fix in autoTVM ?

vinx13 · April 29, 2021, 1:24am

It has task_inputs argument:

github.com

apache/tvm/blob/main/tutorials/auto_scheduler/tune_sparse_x86.py#L111-L122


prefix = "sparse_dense_bsr_%d_%d_%d_%d_%.2f_" % (N, K, BS_R, BS_C, density)
task = tvm.auto_scheduler.SearchTask(
    func=sparse_dense,
    args=(M, N, K, W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, "float32"),
    target=target,
    task_inputs={
        prefix + "W_data": runtime.ndarray.array(W_sp_np.data),
        prefix + "W_indices": runtime.ndarray.array(W_sp_np.indices),
        prefix + "W_indptr": runtime.ndarray.array(W_sp_np.indptr),
    },
    task_inputs_save_to_file=True,
)

github.com

apache/tvm/blob/main/python/tvm/auto_scheduler/search_task.py#L389-L391


task_inputs=None,
task_inputs_overwrite=False,
task_inputs_save_to_file=False,

huangteng · May 14, 2021, 9:11am

Thanks, by the way, I notice that in the measure.py code below:

github.com

apache/tvm/blob/main/python/tvm/autotvm/measure/measure_methods.py#L584-L589


args = [nd.empty(x[0], x[1], dev) for x in build_result.arg_info]
if "scatter" not in measure_input.task.name:
    # the index tensor of scatter op cannot be randomly initialized
    for arg in args:
        random_fill(arg)
dev.sync()

if the ‘scatter’ is set, the tensor will not be randomly filled, what does this scatter mean ? And is that possible to trigger this scatter option to workaround this issue ? (I tried to comment the random_fill(), it indeed solve the crash issue, but I don’t know how to control this scatter value from user api level). Appreciate your tips

huangteng · May 17, 2021, 1:11am

@tqchen any comments about this ? Appreciated.

vinx13 · May 17, 2021, 2:55am

Scatter is an op in topi. Currently it doesn’t support customized input for user created workload, this is something that needs improvement. But you can quickly add else if branch for your task name.