Issue with Graph runtime for nested function call. Works fine on VM

Facing issue with code below on Graph Runtime. However, it works fine with VM. I am trying to call a function within a function. Relay snippets are added in source code below for quick reference.

Also, I was facing an issue because of FoldScaleAxis pass, so I disabled it for the time being. Any help, pointers much appreciated here.

Code

import tvm
from tvm import relay
import numpy as np
from tvm.contrib import graph_runtime


def run_on_graph_runtime(mod, in_names, in_data):
    target = 'llvm'
    target_host = None
    opt_level = 3

    # Disabled FoldScaleAxis
    with relay.build_config(opt_level=opt_level, disabled_pass=["FoldScaleAxis"]):
        graph, lib, params = relay.build(mod, target, target_host, {})

    ctx = tvm.context(target, 0)
    m = graph_runtime.create(graph, lib, ctx)
    for e, i in zip(in_names, in_data):
        m.set_input(e, tvm.nd.array(i))
    m.set_input(**params)
    m.run()
    return m.get_output(0).asnumpy()


mod = tvm.IRModule()
sb = relay.ScopeBuilder()
x = relay.var('x', 'float32')
y = relay.var('y', 'float32')
sb.ret(x + y)
func1 = relay.Function([x, y], sb.get())

'''
fn (%x: float32, %y: float32) {
  add(%x, %y)
}
'''

sb = relay.ScopeBuilder()
x1 = relay.var('x1', 'float32')
y1 = relay.var('y1', 'float32')
func2 = sb.let('add_two', func1)
sb.ret(func2(x1, y1))
op = sb.get()
f = relay.Function([x1, y1], op)

'''
fn (%x1: float32, %y1: float32) {
  let %add_two = fn (%x: float32, %y: float32) {
    add(%x, %y)
  };
  %add_two(%x1, %y1)
}
'''

mod["main"] = f
x_data = np.array(np.random.rand()).astype('float32')
y_data = np.array(np.random.rand()).astype('float32')

# VM is working
ex = relay.create_executor("vm", mod=mod, ctx=tvm.cpu(), target="llvm")
result = ex.evaluate()(*[x_data, y_data])

# graph runtime is not working
ex = relay.create_executor("graph", mod=mod, ctx=tvm.cpu(), target="llvm")
result = ex.evaluate()(*[x_data, y_data])

# graph runtime is not working
res = run_on_graph_runtime(mod, ['x1', 'y1'], [x_data, y_data])

StackTrace:

test_func_call.py:None (test_func_call.py)
test_func_call.py:65: in <module>
    result = ex.evaluate()(*[x_data, y_data])
../../../python/tvm/relay/backend/interpreter.py:171: in evaluate
    return self._make_executor()
../../../python/tvm/relay/build_module.py:357: in _make_executor
    graph_json, mod, params = build(self.mod, target=self.target)
../../../python/tvm/relay/build_module.py:250: in build
    graph_json, mod, params = bld_mod.build(mod, target, target_host, params)
../../../python/tvm/relay/build_module.py:119: in build
    self._build(mod, target, target_host)
../../../python/tvm/_ffi/_ctypes/packed_func.py:213: in __call__
    raise get_last_ffi_error()
E   tvm._ffi.base.TVMError: Traceback (most recent call last):
E     [bt] (8) 9   libtvm.dylib                        0x000000011abc6726 tvm::relay::StorageAllocaInit::GetInitTokenMap(tvm::relay::Function const&) + 118
E     [bt] (7) 8   libtvm.dylib                        0x000000011abc698e tvm::relay::StorageAllocaBaseVisitor::Run(tvm::relay::Function const&) + 206
E     [bt] (6) 7   libtvm.dylib                        0x000000011abc48f8 tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&) + 40
E     [bt] (5) 6   libtvm.dylib                        0x000000011ac2809d tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) + 253
E     [bt] (4) 5   libtvm.dylib                        0x000000011aa5d268 tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&) + 152
E     [bt] (3) 4   libtvm.dylib                        0x000000011aa5d55c tvm::NodeFunctor<void (tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*)>::operator()(tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*) const + 284
E     [bt] (2) 3   libtvm.dylib                        0x000000011abc4080 tvm::relay::StorageAllocaBaseVisitor::VisitExpr_(tvm::relay::LetNode const*) + 32
E     [bt] (1) 2   libtvm.dylib                        0x000000011abc4a20 tvm::relay::StorageAllocaBaseVisitor::GetToken(tvm::RelayExpr const&) + 336
E     [bt] (0) 1   libtvm.dylib                        0x000000011a386109 dmlc::LogMessageFatal::~LogMessageFatal() + 57
E     File "/Users/mahesh/git/deepak/tvm/src/relay/backend/graph_plan_memory.cc", line 121
E   TVMError: Check failed: it != token_map_.end():
collected 0 items / 1 error

StackTrace when FoldScaleAxis pass is enabled

test_func_call.py:None (test_func_call.py)
test_func_call.py:68: in <module>
    res = run_on_graph_runtime(mod, ['x1', 'y1'], [x_data, y_data])
test_func_call.py:14: in run_on_graph_runtime
    graph, lib, params = relay.build(mod, target, target_host, {})
../../../python/tvm/relay/build_module.py:250: in build
    graph_json, mod, params = bld_mod.build(mod, target, target_host, params)
../../../python/tvm/relay/build_module.py:119: in build
    self._build(mod, target, target_host)
../../../python/tvm/_ffi/_ctypes/packed_func.py:213: in __call__
    raise get_last_ffi_error()
E   tvm._ffi.base.TVMError: Traceback (most recent call last):
E     [bt] (8) 9   libtvm.dylib                        0x000000011736e268 tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&) + 152
E     [bt] (7) 8   libtvm.dylib                        0x000000011736e55c tvm::NodeFunctor<void (tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*)>::operator()(tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*) const + 284
E     [bt] (6) 7   libtvm.dylib                        0x00000001174125c6 tvm::relay::fold_scale_axis::ForwardPrep::VisitExpr_(tvm::relay::FunctionNode const*) + 38
E     [bt] (5) 6   libtvm.dylib                        0x0000000117539577 tvm::relay::ExprVisitor::VisitExpr_(tvm::relay::FunctionNode const*) + 199
E     [bt] (4) 5   libtvm.dylib                        0x000000011753909d tvm::relay::ExprVisitor::VisitExpr(tvm::RelayExpr const&) + 253
E     [bt] (3) 4   libtvm.dylib                        0x000000011736e268 tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>::VisitExpr(tvm::RelayExpr const&) + 152
E     [bt] (2) 3   libtvm.dylib                        0x000000011736e55c tvm::NodeFunctor<void (tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*)>::operator()(tvm::runtime::ObjectRef const&, tvm::relay::ExprFunctor<void (tvm::RelayExpr const&)>*) const + 284
E     [bt] (1) 2   libtvm.dylib                        0x00000001174127d5 tvm::relay::fold_scale_axis::ForwardPrep::VisitExpr_(tvm::relay::LetNode const*) + 85
E     [bt] (0) 1   libtvm.dylib                        0x0000000116c97109 dmlc::LogMessageFatal::~LogMessageFatal() + 57
E     File "/Users/mahesh/git/deepak/tvm/src/relay/transforms/fold_scale_axis.cc", line 243
E   TVMError: FoldScaleAxis only accept dataflow-form
collected 0 items / 1 error

That’s expected as graph runtime is not supposed to handle these situations. You should use VM if the program has higher order features.

1 Like

Thanks for the confirmation. Actually, we wanted to add support for PartitionCall operator in Tensorflow frontend and for that wanted to add support for Function call from “main” function. It seems we will have to do a workaround where we extract the subgraph from function and add it to in “main” function instead of adding it as a separate function. So that it works on both the runtime. It will be difficult to support recursive calls though.