Check failed: (pval != nullptr) is false: Cannot allocate memory symbolic tensor shape [?, 1]

bowll · February 21, 2023, 9:01am

Hello!

I try to compile and tune my Pytorch model in TVM. However I met a problem above. I already tried methods like mod = relay.transform.DynamicToStatic()(mod) and mod,params = relay.frontend.from_onnx(onnx_mod,freeze_params=True) but it doesn’t work for me.

Could you please help me figure out where my problem is and how can I solve it. Thanks a lot !!

my torch model:

class PageRank_CPU(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,source,target,init_vertex,iteration,vertex_num):
        source = source.int()
        target = target.int()
        V_out_deg = torch.zeros_like(init_vertex, dtype=torch.int32)
        V_out_deg = V_out_deg.scatter_add(0, source , torch.ones_like(source, dtype=torch.int32))
        mask = (V_out_deg == 0)
        V_old = init_vertex
        sum =  torch.sum(V_old)
        V_old = V_old /sum
        # start iteration
        round = torch.tensor(0)
        while round < iteration:
            V_new = torch.zeros_like(init_vertex)
            V_old_temp = V_old / V_out_deg
            blind_sum = torch.masked_select(V_old,mask).sum()
            V_new = V_new.scatter_add(0, target, V_old_temp[source])
            V_new = V_new * 0.85 + (0.15 + blind_sum * 0.85) / vertex_num
            diff = torch.abs(V_new-V_old).sum()
            V_old = V_new
            round+=1
            if torch.lt(diff,1e-7):break
        return V_old

transform into relay and compile

scripted_model = torch.jit.trace(model,example_inputs=dummy_input)
input_names = ["source",'target','init_vertex']
output_names = ["output_0"]
import tvm
from tvm import relay
print(tvm.__version__)
shape_list = list(zip(input_names,[i.shape for i in dummy_input]))
print(shape_list)
mod, params = relay.frontend.from_pytorch(scripted_model,shape_list)
mod = relay.transform.DynamicToStatic()(mod)
target = tvm.target.Target("llvm", host="llvm")
dev = tvm.cpu(0)
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

output:

Traceback (most recent call last):
  File "/home/zht/GraphDemos/onnx_test.py", line 21, in <module>
    lib = relay.build(mod, target=target, params=params)
  File "/home/zht/anaconda3/envs/torch/lib/python3.7/site-packages/tvm-0.10.0-py3.7-linux-x86_64.egg/tvm/relay/build_module.py", line 372, in build
    mod_name=mod_name,
  File "/home/zht/anaconda3/envs/torch/lib/python3.7/site-packages/tvm-0.10.0-py3.7-linux-x86_64.egg/tvm/relay/build_module.py", line 169, in build
    mod_name,
  File "/home/zht/anaconda3/envs/torch/lib/python3.7/site-packages/tvm-0.10.0-py3.7-linux-x86_64.egg/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
    raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
  381: TVMFuncCall
  380: tvm::relay::backend::RelayBuildModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#3}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
  379: tvm::relay::backend::RelayBuildModule::BuildRelay(tvm::IRModule, tvm::runtime::String const&)
  378: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::relay::backend::GraphExecutorCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#2}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  377: tvm::relay::backend::GraphExecutorCodegen::Codegen(tvm::IRModule, tvm::relay::Function, tvm::runtime::String)
  376: tvm::relay::GraphPlanMemory(tvm::relay::Function const&)
  375: tvm::relay::StorageAllocator::Plan(tvm::relay::Function const&)
  374: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
  373: tvm::relay::transform::DeviceAwareExprVisitor::VisitExpr_(tvm::relay::FunctionNode const*)
  372: tvm::relay::StorageAllocaBaseVisitor::DeviceAwareVisitExpr_(tvm::relay::FunctionNode const*)
  371: tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&)
  370: tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&)
.......
Check failed: (pval != nullptr) is false: Cannot allocate memory symbolic tensor shape [?, 1]

onnx version use the freeze_params=True

torch.onnx.export(scripted_model,dummy_input,'test.onnx',
                  input_names=input_names,output_names=output_names,
                  verbose=True, opset_version=16
                   )
onnx_mod = onnx.load_model('test.onnx')
mod,params = relay.frontend.from_onnx(onnx_mod,freeze_params=True)
mod = relay.transform.DynamicToStatic()(mod)
target = tvm.target.Target("llvm", host="llvm")
dev = tvm.cpu(0)
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

failed with same output

my configuration: torch13, tvm0.10, onnx 1.13

please help me with this problem, I have been stuck on this problem for several weeks. Thank a lot!