Assume we have an expression constructed as follows
w_bit = 32
x = relay.var("input", shape=[7], dtype=f"int{w_bit}")
w = relay.var("x2", shape=[2,], dtype=f"int{w_bit}")
zx = relay.var("x3", shape=[3, ], dtype=f"int{w_bit}")
zy = relay.var("x4", shape=[4, ], dtype=f"int{w_bit}")
e_scale = relay.var("x5", shape=[5,], dtype=f"int{w_bit}")
th = relay.var("x6", shape=[6, ], dtype=f"int{w_bit}")
yy = relay.concatenate([zx, zy], axis=-1)
out = yy + x
fn = relay.Function([x, w, zx, zy, e_scale, th], out)
mod = tvm.IRModule.from_expr(fn)
mod = relay.transform.InferType()(mod)
mod['main']
'''
fn (%input: Tensor[(7), int32], %x2: Tensor[(2), int32], %x3: Tensor[(3), int32], %x4: Tensor[(4), int32], %x5: Tensor[(5), int32], %x6: Tensor[(6), int32]) -> Tensor[(7), int32] {
%0 = (%x3, %x4);
%1 = concatenate(%0, axis=-1) /* ty=Tensor[(7), int32] */;
add(%1, %input) /* ty=Tensor[(7), int32] */
}
'''
When loaded in GraphExecutor, it is expected to feed 6 tensors (one input and five weights). However, after compiling, lib.get_params
only return one parameters. Is there any implicit folding happens during the build?
vs = relay.analysis.all_vars(mod["main"])
# [Var(v1_input, ty=TensorType([1, 3, 80, 80], int32)), Var(v2_weight, ty=TensorType([16, 3, 3, 3], int32)), Var(v3_zero_x, ty=TensorType([1], int32)), Var(v4_zero_y, ty=TensorType([1], int32)), Var(v5_effective_scale, ty=TensorType([1, 16, 1, 1], int32)), Var(v6_truncate_threashold, ty=TensorType([1], int32))]
tp = dict()
for idx, v in enumerate(vs):
if "input" not in str(v.name_hint):
shape = [int(_) for _ in v.type_annotation.shape]
p = np.ones(shape).astype(np.int32) * idx
tp[str(v.name_hint)] = tvm.nd.array(p)
lib = relay.build(mod['main'], target="llvm", params=tp)
[(k, lib.get_params()[k].shape) for k in sorted(lib.get_params())]
# [('p0', (7,))]