When I tried to generate C code for a model, I can see the loop bounds are constants. Is it possible to generate C code with non-constant expressions(which should be derived from tensor shapes/strides)?
import onnx
import tvm
import tvm.relay as relay
from tvm.contrib.download import download_testdata
model_url = "".join(
[
"https://gist.github.com/zhreshold/",
"bcda4716699ac97ea44f791c24310193/raw/",
"93672b029103648953c4e5ad3ac3aadf346a4cdc/",
"super_resolution_0.2.onnx",
]
)
model_path = download_testdata(model_url, "super_resolution.onnx", module="onnx")
onnx_model = onnx.load(model_path)
target = "c"
mod, params = relay.frontend.from_onnx(onnx_model,freeze_params=False)
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True, "tir.debug_keep_trivial_loop": True}):
graph , lib , params = relay.build(mod, target=target, params=None)
print(lib.get_source())
Trimmed Output:
TVM_DLL int32_t tvmgen_default_fused_expand_dims_expand_dims_layout_transform(void* args, int32_t* arg_type_ids, int32_t num_args, void* out_ret_value, int32_t* out_ret_tcode, void* resource_handle) {
void* arg_placeholder = (((TVMValue*)args)[0].v_handle);
int32_t arg_placeholder_code = arg_type_ids[0];
void* arg_T_layout_trans = (((TVMValue*)args)[1].v_handle);
int32_t arg_T_layout_trans_code = arg_type_ids[1];
void* placeholder = (((DLTensor*)arg_placeholder)[0].data);
void* arg_placeholder_shape = (((DLTensor*)arg_placeholder)[0].shape);
void* arg_placeholder_strides = (((DLTensor*)arg_placeholder)[0].strides);
int32_t dev_id = (((DLTensor*)arg_placeholder)[0].device.device_id);
void* T_layout_trans = (((DLTensor*)arg_T_layout_trans)[0].data);
void* arg_T_layout_trans_shape = (((DLTensor*)arg_T_layout_trans)[0].shape);
void* arg_T_layout_trans_strides = (((DLTensor*)arg_T_layout_trans)[0].strides);
if (!(arg_placeholder_strides == NULL)) {
}
if (!(arg_T_layout_trans_strides == NULL)) {
}
for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 16; ++ax0_ax1_fused_ax2_fused) {
for (int32_t ax3 = 0; ax3 < 1; ++ax3) {
for (int32_t ax4_outer = 0; ax4_outer < 1; ++ax4_outer) {
for (int32_t ax4_inner = 0; ax4_inner < 4; ++ax4_inner) {
int32_t cse_var_1 = ((ax0_ax1_fused_ax2_fused * 4) + ax4_inner);
((float*)T_layout_trans)[cse_var_1] = ((float*)placeholder)[cse_var_1];
}
}
}
}
return 0;
}