Hi there!
Recently, I’ve been working with the Hexagon backend codegen and encountered a few questions. Here’s a simple example related to VTCM buffer allocation:
def test_alloc_vtcm():
target = tvm.target.hexagon("v66")
buf_len = 2048
A = tvm.te.placeholder((buf_len,), name="A", dtype="int8")
B = tvm.te.placeholder((buf_len,), name="B", dtype="int8")
A_buf = tvm.te.compute((buf_len,), lambda *i: A(*i), "A_buf")
B_buf = tvm.te.compute((buf_len,), lambda *i: B(*i), "B_buf")
C = tvm.te.compute((buf_len,), lambda *i: A_buf(*i) + B_buf(*i), name="C")
s = tvm.te.create_schedule(C.op)
irmod = tvm.lower(s, [A, B, C])
# Use VTCM for each buffer.
s[A_buf].set_scope("local.vtcm")
s[B_buf].set_scope("local.vtcm")
config = {"tir.add_lower_pass": hexagon.ir_lower_vtcm_pass()}
with tvm.transform.PassContext(config=config):
irmod = tvm.lower(s, [A, B, C], name="alloc_vtcm")
# print(irmod)
func_tir = tvm.build(irmod, [A, B, C],target=get_hexagon_target("v68"))
m=func_tir.get_source("asm")
calls = re.findall("HexagonBackend[A-Za-z]*VTCM", str(irmod["alloc_vtcm"]))
and the lowerd main tir code:
@T.prim_func
def alloc_vtcm(A: T.Buffer((2048,), "int8"), B: T.Buffer((2048,), "int8"), C: T.Buffer((2048,), "int8")):
T.func_attr({"from_legacy_te_schedule": T.bool(True), "tir.noalias": T.bool(True)})
A_buf: T.handle("int8", "local.vtcm") = T.call_extern("handle", "HexagonBackendAllocateVTCM", 2048, 2048)
if T.isnullptr(A_buf):
T.tvm_throw_last_error()
else:
with T.LetStmt(T.call_extern("handle", "HexagonBackendAllocateVTCM", 2048, 2048), T.handle("int8", "local.vtcm")) as B_buf:
if T.isnullptr(B_buf):
T.tvm_throw_last_error()
else:
A_buf_1 = T.Buffer((2048,), "int8", data=A_buf, scope="local.vtcm", align=128)
for i0 in range(2048):
A_buf_1[i0] = A[i0]
B_buf_1 = T.Buffer((2048,), "int8", data=B_buf, scope="local.vtcm", align=128)
for i0 in range(2048):
B_buf_1[i0] = B[i0]
for i0 in range(2048):
C[i0] = A_buf_1[i0] + B_buf_1[i0]
T.call_extern("handle", "HexagonBackendFreeVTCM", B_buf)
T.call_extern("handle", "HexagonBackendFreeVTCM", A_buf)
So abviously there is a call_extern
to HexagonBackendFreeVTCM
/ HexagonBackendAllocateVTCM
, and the result asm also contain this call like this:
{
call HexagonBackendFreeVTCM
if (q1) vmem(r2+#0) = v31
}
I checked the entire compilation process(codegen_hexagon.cc)and the final obj file generate by LLVM, the call HexagonBackendFreeVTCM
/ HexagonBackendAllocateVTCM
is remain undefined,:
00000010 b .tvm_func.__tvm_set_device
U HexagonBackendAllocateVTCM
U HexagonBackendFreeVTCM
My current guess towards this: Hexagon Call is linked somewhere, and the relevant library code is located in src/runtime/hexagon,but now I still can’t find this linking period, However, I still haven’t pinpointed where this linking occurs.
Does anyone have insights on how this linking is handled? Any insights or suggestions would be greatly appreciated!