Building runtime factory for the TVM graph executor Failed

ValerianDing · October 31, 2024, 5:01am

Hi, I am using relay to build runtime module。I follow the steps below: 1. export pytorch code to onnx 2. using relay frontend to import onnx model 3. build the runtime module. But it comes to an error:

tvm._ffi.base.TVMError: Traceback (most recent call last):
  46: TVMFuncCall
  45: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::RelayBuildModule::GetFunction(std::string const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#3}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  44: tvm::relay::backend::RelayBuildModule::BuildRelay(tvm::IRModule, std::unordered_map<std::string, tvm::runtime::NDArray, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, tvm::runtime::NDArray> > > const&, tvm::runtime::String)
  43: tvm::relay::backend::ExecutorCodegen::Codegen(tvm::relay::Function const&, tvm::runtime::String)
  42: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphExecutorCodegenModule::GetFunction(std::string const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
  41: tvm::relay::backend::GraphExecutorCodegen::Codegen(tvm::relay::Function, tvm::runtime::String)
  40: tvm::relay::GraphPlanMemory(tvm::relay::Function const&)
  39: tvm::relay::StorageAllocator::Plan(tvm::relay::Function const&)
  38: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  37: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::FunctionNode const*)
  36: tvm::relay::StorageAllocaBaseVisitor::DeviceAwareVisitExpr_(tvm::relay::FunctionNode const*)
  35: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  34: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  33: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  32: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  31: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  30: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  29: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  28: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  27: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  26: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  25: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  24: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  23: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  22: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  21: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  20: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  19: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  18: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  17: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  16: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  15: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  14: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  13: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  12: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  11: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  10: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  9: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  8: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  7: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  6: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  5: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
  4: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
  3: tvm::relay::StorageAllocator::CreateTokenOnDevice(tvm::RelayExprNode const*, DLDeviceType, bool)
  2: tvm::relay::StorageAllocator::Request(tvm::relay::StorageToken*)
  1: tvm::relay::StorageAllocator::GetMemorySize(tvm::relay::StorageToken*)
  0: _ZN3tvm7runtime6deta
  File "/root/BlazerML-tvm/src/relay/backend/graph_plan_memory.cc", line 408
TVMError: 
---------------------------------------------------------------
An error occurred during the execution of TVM.
For more information, please see: https://tvm.apache.org/docs/errors.html
---------------------------------------------------------------
  Check failed: (pval != nullptr) is false: Cannot allocate memory symbolic tensor shape [?, ?, ?, ?]

The pytorch code is as belows:

import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class MHA(nn.Module):
    def __init__(self):
        super().__init__()
        self.scale = 0.0001
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, q, k, v, bias, mask=None):
        b, n, s, h = q.shape
        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))
        attn = attn + bias
        if mask is not None:
            attn = attn.view(1, b, n, s, s//4) + mask.unsqueeze(1).unsqueeze(0)
            attn = attn.view(-1, n, s, s//4)
            attn = self.softmax(attn)
        else:
            attn = self.softmax(attn)
        attn = attn @ v
        return q

b = 252
n = 4
s = 1024
h = 32

q = torch.randn((b, n, s, h)).cuda()
k = torch.randn((b, n, s//4, h)).cuda()
v = torch.randn((b, n, s//4, h)).cuda()
print(q.shape)
print(k.shape)
print(v.shape)
mask = torch.randn((b, s, s//4)).cuda()
bias = torch.randn((1, n, s, s//4)).cuda()
model = MHA().cuda()
with torch.no_grad():
    x = model(q, k, v, bias, mask)
print(x.shape)

torch.onnx.export(
    model=model,
    args=(q,k,v,bias,mask),
    f="psa.onnx",
    input_names=["q","k","v","bias","mask"],
    output_names=["attn"],
    export_params=True,
    opset_version=16,
    do_constant_folding=True,
)

And the deploy to TVM code:

import tvm
from tvm.driver import tvmc
from tvm import te
from tvm import relay
from tvm import auto_scheduler
from tvm.contrib import graph_executor
from tvm import autotvm
import numpy as np
import time
import onnx
from tvm.relay import transform
log_file = 'tune.json'
opt_level = 3
target = tvm.target.Target("cuda")
onnx_model = onnx.load('psa.onnx')
mod, params = relay.frontend.from_onnx(onnx_model)
with tvm.transform.PassContext(
    opt_level=opt_level, config={}
):
    lib = relay.build(mod, target, params=params)

The TVM Version is 0.8.0.

Hzfengsy · November 1, 2024, 4:38am

v0.8.0 was released many years ago, please try latest version and relax unity flow