IndexError when running relax.pipeline on a UNet model

Environment

  • TVM version: [self-built mainline TVM (0.21.0.dev)]
  • Operating System: [Ubuntu 24.04]
  • Python version: [3.12]
  • Hardware (CPU/GPU): [RTX 4060 with CUDA 12.8]

Problem Description

I’m encountering an IndexError when trying to optimize a UNet model. The error occurs during the relax.pipeline() call.

Here is the code I used:


from diffusers import PNDMScheduler, AutoencoderKL, UNet2DConditionModel, UNet2DModel
from transformers import CLIPTokenizer, CLIPTextModel

import os
import numpy as np
import torch
from torch.export import export
from time import time
import tvm
from tvm import relax
from tvm.relax.frontend.torch import from_exported_program

TORCH_CPU = torch.device("cpu")
TORCH_GPU = torch.device("cuda:0")
TORCH_DEVICE = TORCH_GPU

MODEL_ID = "google/ddpm-cat-256"
sample = torch.randn(1, 3, 256, 256, dtype=torch.float32).to(TORCH_DEVICE)

timestep = torch.tensor(1, dtype=torch.float32).to(TORCH_DEVICE)

example_args = (sample, timestep)
example_kwargs = {}

torch_model = UNet2DModel.from_pretrained(
    MODEL_ID,
    use_safetensors=True
).to(TORCH_DEVICE).eval()

IS_IN_CI = os.getenv("CI", "") == "true"

if not IS_IN_CI:
    with torch.no_grad():
        exported_program = export(torch_model, example_args)
        print("entering from_exported_program")
        mod = from_exported_program(exported_program, keep_params_as_input=True)

    mod, params = relax.frontend.detach_params(mod)
    mod.show()


TOTAL_TRIALS = 8000
target = tvm.target.Target("nvidia/geforce-rtx-4090")
work_dir = "tuning_logs"

if not IS_IN_CI:
    print("entering pipeline")
    mod = relax.get_pipeline("static_shape_tuning", target=target, total_trials=TOTAL_TRIALS)(mod)
    print("pipeline finished")
    mod["main"].show()


if not IS_IN_CI:
    ex = tvm.compile(mod, target="cuda")
    dev = tvm.device("cuda", 0)
    vm = relax.VirtualMachine(ex, dev)

for i in range(500):
    start_time = time()
    gpu_sample = tvm.nd.array(np.random.rand(1, 3, 256, 256).astype(np.float32), dev)
    gpu_timestep = tvm.nd.array(np.array([1], dtype=np.float32), dev)
    
    gpu_params = [tvm.nd.array(p, dev) for p in params["main"]]
    
    gpu_out = vm["main"](gpu_sample, gpu_timestep, *gpu_params)

    print(gpu_out)
    print(f"Time taken: {time() - start_time:.4f} seconds")

It’s modified from the e2e_opt_model.py from TVM’s documentation.

And the error message is like this:

Traceback (most recent call last):
  6: _ZN3tvm7runtime13PackedFunc
  5: tvm::runtime::TypedPackedFunc<void (tvm::meta_schedule::TaskScheduler, tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>)>::AssignTypedLambda<tvm::runtime::Registry::set_body_method<tvm::meta_schedule::TaskScheduler, tvm::meta_schedule::TaskSchedulerNode, void, tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>, void>(void (tvm::meta_schedule::TaskSchedulerNode::*)(tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>))::{lambda(tvm::meta_schedule::TaskScheduler, tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>)#1}>(tvm::runtime::Registry::set_body_method<tvm::meta_schedule::TaskScheduler, tvm::meta_schedule::TaskSchedulerNode, void, tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>, void>(void (tvm::meta_schedule::TaskSchedulerNode::*)(tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>))::{lambda(tvm::meta_schedule::TaskScheduler, tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>)#1}, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*) const [clone .isra.0]
  4: tvm::meta_schedule::GradientBasedNode::Tune(tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>)
  3: tvm::meta_schedule::TaskSchedulerNode::Tune(tvm::runtime::Array<tvm::meta_schedule::TuneContext, void>, tvm::runtime::Array<tvm::FloatImm, void>, int, int, int, tvm::meta_schedule::Builder, tvm::meta_schedule::Runner, tvm::runtime::Array<tvm::meta_schedule::MeasureCallback, void>, tvm::runtime::Optional<tvm::meta_schedule::Database>, tvm::runtime::Optional<tvm::meta_schedule::CostModel>)
  2: tvm::meta_schedule::PostOrderApplyNode::GenerateDesignSpace(tvm::IRModule const&)
  1: tvm::meta_schedule::CrossThreadReductionNode::Apply(tvm::tir::Schedule const&, tvm::tir::BlockRV const&)
  0: tvm::runtime::Array<tvm::runtime::ObjectRef, void>::operator[](long) const
  File "/home/x17/code_repo/tvm/include/tvm/runtime/container/array.h", line 414
InternalError: Check failed: (0 <= i && i < p->size_) is false: IndexError: indexing 6 on an array of size 6

Is this the correct way to optimize a model like this? And how could I solve this problem?

Any help or suggestions would be greatly appreciated!

I have this problem too, have you solved it?

No, I have no idea how to solve this…