Hi,
I am using LLVM X86 intrinsics. The code is below
import tvm
import numpy as np
def test_int8():
n = 64
A = tvm.placeholder((n,), name='A', dtype='int8')
B = tvm.placeholder((n,), name='B', dtype='int8')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
vecA = ins[0].vload(0, "int8x64")
vecB = ins[1].vload(0, "int8x64")
out = tvm.call_llvm_intrin('int16x32', 'llvm.x86.avx512.pmaddubs.w.512', tvm.const(3, 'uint32'), vecA, vecB)
ib.emit(outs[0].vstore(0, out))
return ib.get()
C = tvm.extern((n/2,), [A, B], extern_generator, dtype='int16', name='C')
s = tvm.create_schedule(C.op)
print(tvm.lower(s, [A, B], simple_mode=True))
def check_target(target):
if not tvm.module.enabled(target):
return
f = tvm.build(s, [A, B, C], target)
f.save('temp.ll')
f.save('temp.s')
ctx = tvm.context(target, 0)
# launch the kernel.
a = tvm.nd.array(np.ones(n, dtype=A.dtype), ctx)
b = tvm.nd.array(np.ones(n, dtype=B.dtype), ctx)
c = tvm.nd.array(np.zeros(n/2, dtype=C.dtype), ctx)
print(a)
print(b)
print(c)
f(a, b, c)
check_target("llvm -mcpu=skylake-avx512")
if __name__ == "__main__":
test_int8()
This piece of code gives a segmentation fault. The fault occurs at check_target function last line.
Relevant line from LLVM IR
%4 = load <64 x i8>, <64 x i8>* %3, align 64, !tbaa !109
%5 = bitcast i8* %2 to <64 x i8>*
%6 = load <64 x i8>, <64 x i8>* %5, align 64, !tbaa !117
%7 = tail call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512.v32i16.v64i8.v64i8(<64 x i8> %4, <64 x i8> %6)
Relevant line from LLVM Assembly
vmovaps (%rsi), %zmm0
vmovaps (%rdx), %zmm1
callq llvm.x86.avx512.pmaddubs.w.512.v32i16.v64i8.v64i8@PLT
vmovaps %zmm0, (%rbx)
I was wondering if the issue is because the llvm intrinsics are not defined here and we need to perform some kind of linking to make this work. Thanks for helping