InternalError: Check failed: (op && result == CL_SUCCESS) is false: Pad Error:-30 running CLML model

dabhinav10 · March 1, 2024, 10:16am

@srkreddy1238 I’m trying to run a tflite model with NHWC layout, converting it to NCHW supported by CLML in TVM and converted to tvm.so running on Qualcomm device S24 ultra. I’m getting following error:

InternalError: Check failed: (op && result == CL_SUCCESS) is false: Pad Error:-30

from here:

result = CLML_INTF->clCreateMLOpPadQCOM(CLML_CTX, nullptr, &pad_desc, input->tensor,
                                            output->tensor, &op, layer_.tuning_cache);
ICHECK(op && result == CL_SUCCESS) << "Pad Error:" << result;

Now if I offload all the padding layers in the model to cpu or opencl, it runs fine. But gives above error including padding layers. Padding layers used are ZeroPadding2D.

Below is the code used to generate tvm.so and running the model.

import tvm
from tvm import relay, transform
from tvm.contrib import utils, ndk, graph_runtime as runtime

mod, params = relay.frontend.from_tflite(
    tflite_model, shape_dict={input_tensor: input_shape}, dtype_dict={input_tensor: input_dtype}
)
with tvm.transform.PassContext(opt_level=3):
#    mod = seq(mod)
    print("----------ir after layout change------------")
    print(mod)
    print("----------------------")

    if not local_demo and enable_clml:
        print("partition clml")
        print(clml.is_clml_runtime_enabled())
        mod = clml.preprocess_module(mod)
        mod = clml.partition_for_clml(mod, params)

    print("-------------------After PArtition-------------")
    print(mod)
    print("-----------------------")
    target = tvm.target.Target(test_target, host=target)

    #mod = seq(mod)
    lib = relay.build(mod, target=target, params=params)
lib_fname = "dummy_model.tvm.so"
print(ndk)
print (ndk.create_shared)
fcompile = ndk.create_shared if run_on_device else None
lib.export_library(lib_fname, fcompile)

import tvm
import numpy as np
from tvm import te
from tvm.contrib import graph_executor as runtime

ctx = remote.cl(0)
#ctx = remote.cpu(0)

# Transfer the model lib to remote device
remote.upload(lib_fname)
# Load the remote module
rlib = remote.load_module(lib_fname)

# Create a runtime executor module
module = runtime.GraphModule(rlib["default"](ctx))

# Run
module.run()

# Benchmark the performance

ftime = module.module.time_evaluator("run", ctx, number=1, repeat=10)
prof_res = np.array(ftime().results) * 1000
print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))

srkreddy1238 · March 5, 2024, 10:22am

Do you think you can share CLML Codegen output ?

github.com

apache/tvm/blob/46aaf611196ebfb9706bfa85eee24239d59e5f5f/tests/python/contrib/test_clml/infrastructure.py#L237


relay.backend.te_compiler.get().clear()


module = relay.build(mod, target=target, target_host=target_host, params=params)
clml_modules = extract_clml_modules(module)
assert len(clml_modules) == num_clml_modules, (
    f"The number of CLML modules produced ({len(clml_modules)}) does not "
    f"match the expected value ({num_clml_modules})."
)


for mod in clml_modules:
    source = mod.get_source("json")
    codegen = json.loads(source)["nodes"]
    # remove input and const names as these cannot be predetermined
    for node in range(len(codegen)):
        if codegen[node]["op"] == "input" or codegen[node]["op"] == "const":
            codegen[node]["name"] = ""
    codegen_str = json.dumps(codegen, sort_keys=True, indent=2)
    known_good_codegen_str = json.dumps(known_good_codegen, sort_keys=True, indent=2)


    assert codegen_str == known_good_codegen_str, (
        f"The JSON produced by codegen does not match the expected result. \n"

Check this ref about how to dump clml codegen.

Also, let me know disabling pad offload works fine ? You may comment below line.

github.com

apache/tvm/blob/46aaf611196ebfb9706bfa85eee24239d59e5f5f/python/tvm/relay/op/contrib/clml.py#L505


        if shape > 32768:
            return False
    return True


return [
    ("clml.pad_conv2d", pad_conv_pattern(), check_conv),
    ("clml.conv2d", conv_pattern(), check_conv),
    ("clml.conv2d_transpose", conv_transpose_pattern(), check_conv_transpose),
    ("clml.dense1d", dense1d_pattern(), check_dense1d_op),
    ("clml.dense2d", dense2d_pattern(), check_default_op),
    ("clml.pad", pad_pattern(), check_pad_op),
    ("clml.concat", concat_pattern(), check_concat_op),
    ("clml.batch_norm", batch_norm_pattern(), check_default_op),
    ("clml.add", is_op("add")(wildcard(), wildcard()), check_binary_op),
    ("clml.subtract", is_op("subtract")(wildcard(), wildcard()), check_binary_op),
    ("clml.multiply", is_op("multiply")(wildcard(), wildcard()), check_binary_op),
    ("clml.divide", is_op("divide")(wildcard(), wildcard()), check_binary_op),
    ("clml.minimum", is_op("minimum")(wildcard(), wildcard()), check_binary_op),
    ("clml.maximum", is_op("maximum")(wildcard(), wildcard()), check_binary_op),
    ("clml.softmax", is_op("nn.softmax")(wildcard()), check_softmax_op),
    ("clml.reshape", is_op("reshape")(wildcard()), check_reshape),

dabhinav10 · March 5, 2024, 10:26am

Yes, disabling pad offload works fine.