[VTA] Minimal MNIST number example

Hey,

I’ve been trying to get a minimal solution or implementation for a MNIST-Number CNN to work on a PYNQ VTA instance. As for understanding the design flow with VTA I took the Resnet-18 example and adapted / trained my own model to fit the MNIST number dataset. I can run Inference perfectly fine on an x86 cpu and with a little bit less accuracy through tvm/rpc on the ARM cores of the PYNQ. However, as soon as I use VTA all my predictions are fixed to 8 or 3 and it no longer predicts correctly.

def buildExecutionGraphVTA(env, remote, ctx, target, args): print(“Building Execution Graph”) # Load pre-configured AutoTVM schedules with autotvm.tophub.context(target):

    # Populate the shape and data type dictionary for ImageNet classifier input
    dtype_dict = {"data": "float32"}
    shape_dict = {"data": (env.BATCH, 28, 28, 1)}

    # Load own trained gluon model
    gluon_model = vision.get_model(model, pretrained=False, classes=10)
    f_path = "/home/xxx/yyy/models/"
    model_f ="gluon_ownModelTrained_1_1_28_28"
    name = "model"
    f_name = f_path + model_f + "/" + name
    #print("Before load: ", gluon_model.collect_params())
    gluon_model.load_parameters(f_name)
    #print("After load: ", gluon_model.collect_params())
    # Measure build start time
    build_start = time.time()
    
    # Start front end compilation
    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

    if target.device_name == "vta":
        # Perform quantization in Relay
        # Note: We set opt_level to 3 in order to fold batch norm
        with tvm.transform.PassContext():
            with relay.quantize.qconfig(calibrations_mode="global_scale", global_scale=8.0, skip_conv_layers=[]):
                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            assert env.BLOCK_IN == env.BLOCK_OUT
            # do device annotation if target is intelfocl or sim
            print(mod["main"])
            relay_prog = graph_pack(
                mod["main"],
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,
                start_name="nn.conv2d", #pack_dict[model][0], #
                stop_name=pack_dict[model][1],
                #device_annot=True,
            )
    else:
        relay_prog = mod["main"]

    #relay_prog = mod #test

    # Compile Relay program with AlterOpLayout disabled
    if target.device_name != "vta":
        with tvm.transform.PassContext(opt_level=3, disabled_pass={"AlterOpLayout"}):
            graph, lib, params = relay.build(
                relay_prog, target=tvm.target.Target(target, host="llvm"), params=params
            )
    else:
        print(env.TARGET, env.target_host)
        if env.TARGET == "intelfocl":
            target = {"cpu": env.target_vta_cpu, "ext_dev": target}

        onlyCPU = args.cpu and args.vta
        print("VTA ONLY CPU: ", onlyCPU)
        print(type(target), target)
        
        # multiple targets to run both on cpu and vta
        if onlyCPU:
            target = {"cpu": env.target_vta_cpu}

        with vta.build_config(
            opt_level=1, disabled_pass={"AlterOpLayout"} #, "tir.CommonSubexprElimTIR"}
        ):
            graph, lib2, params = relay.build(
                relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
            )

    # Measure Relay build time
    build_time = time.time() - build_start
    print(model + " inference graph built in {0:.2f}s!".format(build_time))

# Send the inference library over to the remote RPC server
temp = utils.tempdir()
lib2.export_library(temp.relpath("graphlib.tar"))
remote.upload(temp.relpath("graphlib.tar"))
lib = remote.load_module("graphlib.tar")

if env.TARGET == "intelfocl":
    ctxes = [remote.ext_dev(0), remote.cpu(0)]
    m = graph_executor.create(graph, lib, ctxes)
else:
    # Graph runtime
    m = graph_executor.create(graph, lib, ctx)

return m, params, graph, lib2

def runImageInferenceVTA(env, remote, ctx, m, params, test_loader, folder_path): print(“Run Image Inference VTA”) # Load image for eval for batch_idx, (features, targets) in enumerate(test_loader): for idx, (image, label) in enumerate(zip(features, targets)):

        image = image.numpy()       
        
        image = np.expand_dims(image, axis=3)  # Shape: (1, 28, 28, 1)

        image = np.tile(image, (1, 1, 1, 1))  # Ensure correct batch size
        
        # Set the network parameters and inputs
        m.set_input(**params)
        m.set_input("data", image)

        # Perform inference and gather execution statistics
        # More on: :py:method:`tvm.runtime.Module.time_evaluator`
        num = 1  # number of times we run module for a single measurement
        rep = 1  # number of measurements (we derive std dev from this)
        #timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
        
        m.run()

        # Get classification results
        tvm_output = m.get_output(0, tvm.nd.empty((env.BATCH, 10), "float32", remote.cpu(0)))
        for b in range(env.BATCH):
            #print(len(tvm_output.numpy()[0]))
            #print(tvm_output.numpy()[b])

            pred = [output.argmax(axis=1).astype("int") for output in [tvm_output.asnumpy()]][0].tolist()
            print("Prediction (PRED,LABEL,EQUAL): ", pred[0], label.item(), pred[0]==label.item())

I narrowed down the error to the actually execution of “graph_pack”. After running it the predictions are off.

The only constraint I have that I need the first convolution to also happen within VTA.

I’d be glad if someone could hint me to either a minimal solution that wouldn’t require Resnet18 or an idea where I am going wrong with graph pack or my overall implementation.

Thanks for any help.

Kind regards