I’m new to tvm. I tried to do heterogeneous execution on cpu and gpu. And I found some examples in /tvm/tests/python/relay/test_pass_annotation.py
. I tried to build the annotated mod and run it on cpu and gpu. But some strange errors occured.
Here is my code .My tvm version is 0.8.
import tvm
from tvm import relay
from tvm.relay import transform
def run_opt_pass(expr, passes):
passes = passes if isinstance(passes, list) else [passes]
mod = tvm.IRModule.from_expr(expr)
seq = tvm.transform.Sequential(passes)
with tvm.transform.PassContext(opt_level=3):
mod = seq(mod)
return mod["main"]
def test_annotate_all():
ctx1 = tvm.context(1) # cpu
ctx2 = tvm.context(2) # gpu
x = relay.var("x", shape=(3,))
y = relay.var("y", shape=(3,))
z = relay.var("z", shape=(3,))
target = {'cpu':'llvm', 'cuda':'cuda'}
def annotated():
add = relay.add(x, y)
_add = relay.annotation.on_device(add, ctx1)
sub = relay.subtract(_add, z)
_sub = relay.annotation.on_device(sub, ctx2)
func = relay.Function([x, y, z], _sub)
func = run_opt_pass(func, transform.RewriteAnnotatedOps(ctx1.device_type))
return func
def expected():
add = relay.add(x, y)
sub = relay.subtract(add, z)
func = relay.Function([x, y, z], sub)
return func
annotated_func = annotated()
mod = tvm.IRModule.from_expr(annotated_func)
amod, _ = relay.optimize(mod, target=target)
params = {}
with relay.build_config(opt_level=1):
graph, lib, params = relay.build(mod, target, params = params)
contexts = [tvm.cpu(0), tvm.context("cuda")]
mod = tvm.contrib.graph_runtime.create(graph, lib, contexts)
mod.set_input("x", [1,2,3])
mod.set_input("y", [4,5,6])
mod.set_input("z", [2,2,2])
mod.run()
result = mod.get_output(0).asnumpy()
print(result)
test_annotate_all()
If I set the two ops on same device (both on cpu or both on gpu), the code can get the correct result [3. 5. 7.]
.
If I set add
on cpu and subtract
on gpu as follow, Segmentation Fault
will happen.
_add = relay.annotation.on_device(add, ctx1)
_sub = relay.annotation.on_device(sub, ctx2)
If I set add
on gpu and subtract
on cpu as follow, the program will abort. Here is the traceback
Traceback (most recent call last):
File "annotated.py", line 75, in <module>
test_annotate_all()
File "annotated.py", line 68, in test_annotate_all
mod.run()
File "/opt/tvm/python/tvm/contrib/graph_runtime.py", line 207, in run
self._run()
File "/opt/tvm/python/tvm/_ffi/_ctypes/packed_func.py", line 237, in __call__
raise get_last_ffi_error()
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (6) /usr/lib64/libtvm.so(TVMFuncCall+0x48) [0x7f4030393298]
[bt] (5) /usr/lib64/libtvm.so(tvm::runtime::GraphRuntime::Run()+0x37) [0x7f403041c527]
[bt] (4) /usr/lib64/libtvm.so(+0x15b4ef5) [0x7f403041def5]
[bt] (3) /usr/lib64/libtvm.so(TVMArrayCopyFromTo+0xa) [0x7f40303b2e5a]
[bt] (2) /usr/lib64/libtvm.so(tvm::runtime::NDArray::CopyFromTo(DLTensor const*, DLTensor*, void*)+0x1f5) [0x7f40303b2d35]
[bt] (1) /usr/lib64/libtvm.so(tvm::runtime::CUDADeviceAPI::CopyDataFromTo(void const*, unsigned long, void*, unsigned long, unsigned long, DLContext, DLContext, DLDataType, void*)+0xa3) [0x7f4030440e73]
[bt] (0) /usr/lib64/libtvm.so(+0x15d6ac5) [0x7f403043fac5]
File "/sources/tvm/src/runtime/cuda/cuda_device_api.cc", line 226
File "/sources/tvm/src/runtime/graph/graph_runtime.cc", line 412
CUDA: Check failed: ret == 0 (-1 vs. 0) : Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: an illegal memory access was encountered
terminate called after throwing an instance of 'dmlc::Error'
what(): [22:20:56] /sources/tvm/src/runtime/cuda/cuda_module.cc:61: CUDAError: cuModuleUnload(module_[i]) failed with error: CUDA_ERROR_ILLEGAL_ADDRESS
Stack trace:
[bt] (0) /usr/lib64/libtvm.so(+0x15db32a) [0x7f403044432a]
[bt] (1) /usr/lib64/libtvm.so(tvm::runtime::CUDAModuleNode::~CUDAModuleNode()+0xd1) [0x7f4030446cd1]
[bt] (2) /usr/lib64/libtvm.so(tvm::runtime::SimpleObjAllocator::Handler<tvm::runtime::CUDAModuleNode>::Deleter_(tvm::runtime::Object*)+0x19) [0x7f4030446e79]
[bt] (3) /usr/lib64/libtvm.so(tvm::codegen::LLVMModuleNode::~LLVMModuleNode()+0x223) [0x7f4030374ef3]
[bt] (4) /usr/lib64/libtvm.so(tvm::runtime::SimpleObjAllocator::Handler<tvm::codegen::LLVMModuleNode>::Deleter_(tvm::runtime::Object*)+0x19) [0x7f4030374f69]
[bt] (5) /usr/lib64/libtvm.so(TVMObjectFree+0x20) [0x7f40303b5dd0]
[bt] (6) /usr/lib64/libffi.so.6(ffi_call_unix64+0x4c) [0x7f420eb72dcc]
[bt] (7) /usr/lib64/libffi.so.6(ffi_call+0x1f5) [0x7f420eb726f5]
[bt] (8) /usr/lib64/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so(_ctypes_callproc+0x2a0) [0x7f420ed85600]
Aborted (core dumped)
Is there something wrong with my code? or is this just a bug?