[Relay][heterogeneous execution] How to run full heterogeneous execution CPU/GPU

This is example test_conv_network of /tests/python/relay/test_pass_annotation.py and I’ll change it to first convolution is CPU, second convolution is GPU. I test network with build, graph_runtime.create, set_input and run like below.

def test_conv_network():
    R"""The network is as following:
    data1     data2
      |         |
    conv2d(1)    conv2d(2)
       \       /
          add(1)
           |
         conv2d(2)
    """
    batch_size = 1
    dshape = (batch_size, 64, 56, 56)
    weight1 = relay.var("weight1", shape=(64, 64, 3, 3))
    weight2 = relay.var("weight2", shape=(64, 64, 3, 3))
    data1 = relay.var("data1", shape=dshape)
    data2 = relay.var("data2", shape=dshape)
    dev1 = tvm.context(1)
    dev2 = tvm.context(2)

    def annotated():
        conv2d_1 = relay.nn.conv2d(data1, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1))
        _conv2d_1 = relay.annotation.on_device(conv2d_1, dev1)
        conv2d_2 = relay.nn.conv2d(data2, weight2, channels=64, kernel_size=(3, 3), padding=(1, 1))
        _conv2d_2 = relay.annotation.on_device(conv2d_2, dev2)
        add = relay.add(_conv2d_1, _conv2d_2)
        _add = relay.annotation.on_device(add, dev1)
        conv2d_3 = relay.nn.conv2d(_add, weight2, channels=64, kernel_size=(3, 3), padding=(1, 1))
        _conv2d_3 = relay.annotation.on_device(conv2d_3, dev2)

        func = relay.Function([data1, data2, weight1, weight2], _conv2d_3)
        print(func)
        func = run_opt_pass(func, transform.RewriteAnnotatedOps(tvm.context(3).device_type))
        return func

    def check_build():
        func = annotated()
        func = run_opt_pass(func, [transform.RewriteAnnotatedOps(3), transform.FuseOps(2)])
        mod = tvm.IRModule.from_expr(func)
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build_module.build(mod, target={"cpu":"llvm", "gpu":"cuda"})
        module = graph_runtime.create(graph, lib, [dev1, dev2])

        in_val1 = tvm.nd.array(np.random.random(dshape).astype("float32"), dev1)
        in_val2 = tvm.nd.array(np.random.random(dshape).astype("float32"), dev2)
        w_val1 = tvm.nd.array(np.random.random((64, 64, 3, 3)).astype("float32"), dev1)
        w_val2 = tvm.nd.array(np.random.random((64, 64, 3, 3)).astype("float32"), dev2)        

        module.set_input("weight1", w_val1)
        module.set_input("weight2", w_val2)
        module.set_input("data1", in_val1)
        module.set_input("data2", in_val2)
        module.set_input(**params)    
        module.run()

But the error was occur, I think that problem occur from argument device types. I don’t know what’s that problem. :disappointed_relieved:

Traceback (most recent call last):
  File "../tests/python/relay/test_pass_annotation.py", line 682, in <module>
    test_conv_network()
  File "../tests/python/relay/test_pass_annotation.py", line 319, in test_conv_network
    test_manual_annotation()
  File "../tests/python/relay/test_pass_annotation.py", line 310, in test_manual_annotation
    check_build()
  File "../tests/python/relay/test_pass_annotation.py", line 302, in check_build
    module.run()
  File "/home/metaljsw2/tvm/python/tvm/contrib/graph_runtime.py", line 206, in run
    self._run()
  File "/home/metaljsw2/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
    raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
  [bt] (4) /home/metaljsw2/tvm/build/libtvm.so(TVMFuncCall+0x65) [0x7fa6bbc9eb55]
  [bt] (3) /home/metaljsw2/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Run()+0x37) [0x7fa6bbd2f2f7]
  [bt] (2) /home/metaljsw2/tvm/build/libtvm.so(+0x2c88277) [0x7fa6bbd2f277]
  [bt] (1) /home/metaljsw2/tvm/build/libtvm.so(+0x2c0c710) [0x7fa6bbcb3710]
  [bt] (0) /home/metaljsw2/tvm/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x82) [0x7fa6bb139382]
  File "/home/metaljsw2/tvm/src/runtime/library_module.cc", line 78
TVMError: Check failed: ret == 0 (-1 vs. 0) : Assert fail: (1 == tir.tvm_struct_get(arg0, 0, 10)), Argument arg0.device_type has an unsatisfied constraint

Is that right make code with heterogeneous execution? If you have a correct example above them, please anyone answer.

Hi @PineApple777 ,

have you found a solution or does the problem still persist?