Hi, I am using relay to build runtime module。I follow the steps below: 1. export pytorch code to onnx 2. using relay frontend to import onnx model 3. build the runtime module. But it comes to an error:
tvm._ffi.base.TVMError: Traceback (most recent call last):
46: TVMFuncCall
45: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::RelayBuildModule::GetFunction(std::string const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#3}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
44: tvm::relay::backend::RelayBuildModule::BuildRelay(tvm::IRModule, std::unordered_map<std::string, tvm::runtime::NDArray, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, tvm::runtime::NDArray> > > const&, tvm::runtime::String)
43: tvm::relay::backend::ExecutorCodegen::Codegen(tvm::relay::Function const&, tvm::runtime::String)
42: std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphExecutorCodegenModule::GetFunction(std::string const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)
41: tvm::relay::backend::GraphExecutorCodegen::Codegen(tvm::relay::Function, tvm::runtime::String)
40: tvm::relay::GraphPlanMemory(tvm::relay::Function const&)
39: tvm::relay::StorageAllocator::Plan(tvm::relay::Function const&)
38: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
37: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::FunctionNode const*)
36: tvm::relay::StorageAllocaBaseVisitor::DeviceAwareVisitExpr_(tvm::relay::FunctionNode const*)
35: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
34: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
33: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
32: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
31: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
30: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
29: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
28: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
27: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
26: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
25: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
24: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
23: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
22: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
21: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
20: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
19: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
18: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
17: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
16: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
15: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
14: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
13: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
12: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
11: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
10: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
9: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
8: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
7: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
6: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
5: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::CallNode const*)
4: tvm::relay::StorageAllocator::DeviceAwareVisitExpr_(tvm::relay::CallNode const*)
3: tvm::relay::StorageAllocator::CreateTokenOnDevice(tvm::RelayExprNode const*, DLDeviceType, bool)
2: tvm::relay::StorageAllocator::Request(tvm::relay::StorageToken*)
1: tvm::relay::StorageAllocator::GetMemorySize(tvm::relay::StorageToken*)
0: _ZN3tvm7runtime6deta
File "/root/BlazerML-tvm/src/relay/backend/graph_plan_memory.cc", line 408
TVMError:
---------------------------------------------------------------
An error occurred during the execution of TVM.
For more information, please see: https://tvm.apache.org/docs/errors.html
---------------------------------------------------------------
Check failed: (pval != nullptr) is false: Cannot allocate memory symbolic tensor shape [?, ?, ?, ?]
The pytorch code is as belows:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class MHA(nn.Module):
def __init__(self):
super().__init__()
self.scale = 0.0001
self.softmax = nn.Softmax(dim=-1)
def forward(self, q, k, v, bias, mask=None):
b, n, s, h = q.shape
q = q * self.scale
attn = (q @ k.transpose(-2, -1))
attn = attn + bias
if mask is not None:
attn = attn.view(1, b, n, s, s//4) + mask.unsqueeze(1).unsqueeze(0)
attn = attn.view(-1, n, s, s//4)
attn = self.softmax(attn)
else:
attn = self.softmax(attn)
attn = attn @ v
return q
b = 252
n = 4
s = 1024
h = 32
q = torch.randn((b, n, s, h)).cuda()
k = torch.randn((b, n, s//4, h)).cuda()
v = torch.randn((b, n, s//4, h)).cuda()
print(q.shape)
print(k.shape)
print(v.shape)
mask = torch.randn((b, s, s//4)).cuda()
bias = torch.randn((1, n, s, s//4)).cuda()
model = MHA().cuda()
with torch.no_grad():
x = model(q, k, v, bias, mask)
print(x.shape)
torch.onnx.export(
model=model,
args=(q,k,v,bias,mask),
f="psa.onnx",
input_names=["q","k","v","bias","mask"],
output_names=["attn"],
export_params=True,
opset_version=16,
do_constant_folding=True,
)
And the deploy to TVM code:
import tvm
from tvm.driver import tvmc
from tvm import te
from tvm import relay
from tvm import auto_scheduler
from tvm.contrib import graph_executor
from tvm import autotvm
import numpy as np
import time
import onnx
from tvm.relay import transform
log_file = 'tune.json'
opt_level = 3
target = tvm.target.Target("cuda")
onnx_model = onnx.load('psa.onnx')
mod, params = relay.frontend.from_onnx(onnx_model)
with tvm.transform.PassContext(
opt_level=opt_level, config={}
):
lib = relay.build(mod, target, params=params)
The TVM Version is 0.8.0.