Hi,
I am trying to build an arm library using ACL and pytorch scripted model. I can create the library without problems using the aarch64-linux-gnu-gcc compiler, but when I try to run the library in the raspberry pi 4, I am getting a Segmentation fault error, so I don’t understand what it is the problem. Here is my code:
To create the library in my PC:
import tvm
from tvm import relay
import torch.nn as nn
import torch.nn.functional as F
import torch
class CovNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
pc = False
data_type = "float32"
data_shape = (1, 3, 32, 32)
input_name = "input" # the input name can be arbitrary for PyTorch frontend.
input_shapes = [(input_name, data_shape)]
input = torch.randn(data_shape, dtype=torch.float32)
net = CovNet()
script_module = torch.jit.trace(net.forward, input).eval()
mod, params = relay.frontend.from_pytorch(script_module, input_shapes)
from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib
module = partition_for_arm_compute_lib(mod)
target = "llvm -mtriple=aarch64-linux-gnu -mattr=+neon -mcpu=cortex-a72"
with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]):
lib = relay.build(module, target=target)
lib_path = 'lib_acl_conv_net.so'
cross_compile = 'aarch64-linux-gnu-gcc'
lib.export_library(lib_path, cc=cross_compile)
To run the library in RPi4:
import time
import tvm
from tvm.contrib import graph_runtime
import numpy as np
data_type = "float32"
data_shape = (1, 3, 32, 32)
dev = tvm.cpu(0)
loaded_lib = tvm.runtime.load_module('lib_acl_conv_net.so')
gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev))
d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
map_inputs = {'data': d_data}
gen_module.set_input(**map_inputs)
timeList = []
for i in range(15):
now = time.time()
gen_module.run()
timeList.append(time.time() - now)
floats_array = np.array(timeList)
np.set_printoptions(precision=3)
print('Execution list:', floats_array)
Any help very much appreciated!!!