AutoTVM: MacOS and Raspberry Pi3 RPC timed out on WiFi

Hi there!

I am following the Auto-tuning a Convolutional Network for ARM CPU document, however, the AutoTVM fails with RPC timeout error.

Steps I made:

  1. (host) Started RPC tracker:
$ python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190 
2024-02-09 16:07:29.473 INFO bind to 0.0.0.0:9190
  1. (rpi) Connected to RPC tracker:
$ python -m tvm.exec.rpc_server --tracker=192.168.0.43:9190 --key=homePi --no-fork
2024-02-09 15:09:14.161 INFO bind to 0.0.0.0:9090
  1. (host) Check status:
$ python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190

Tracker address 0.0.0.0:9190

Server List
------------------------------
server-address           key
------------------------------
    192.168.0.99:9090    server:homePi # valid IP of the Raspberry
------------------------------

Queue Status
------------------------------
key      total  free  pending
------------------------------
homePi   1      1     0      
------------------------------
  1. (host) Start tuning with the given example code. The output is as follows:
2024-02-09 16:14:09.611 ERROR Socket error: timed out
Exception in thread Thread-2 (_check):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "<~>/tvm/python/tvm/autotvm/measure/measure_methods.py", line 801, in _check
    remote = request_remote(device_key, host, port, priority)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<~>/tvm/python/tvm/autotvm/measure/measure_methods.py", line 767, in request_remote
    tracker = _rpc.connect_tracker(host, port)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<~>/tvm/python/tvm/rpc/client.py", line 558, in connect_tracker
    return TrackerSession((url, port))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<~>/tvm/python/tvm/rpc/client.py", line 312, in __init__
    self._connect()
  File "<~>/tvm/python/tvm/rpc/client.py", line 318, in _connect
    self._sock = base.connect_with_retry(self._addr)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<~>/tvm/python/tvm/rpc/base.py", line 195, in connect_with_retry
    raise sock_err
  File "<~>/tvm/python/tvm/rpc/base.py", line 189, in connect_with_retry
    sock.connect(addr)
TimeoutError: timed out
  1. (rpi) Check open ports:
sudo ss -ltnp
State            Recv-Q            Send-Q                       Local Address:Port                       Peer Address:Port           Process                                              
LISTEN           0                 1                                  0.0.0.0:9090                            0.0.0.0:*               users:(("python",pid=1741,fd=3))                    

I am trying to tune a stock MobileNet model from the example code.

Could it be the problem that the two devices are on WiFi network (same connection)? Or did I miss some configuration step?

Hello,

How do you run tuning command/script?

I’m following the example from the documentation. I’ve simplified the code to contain the reached code passes only. For the sake of completeness:

import os
import numpy as np
import tvm

import tvm.contrib.graph_executor as runtime
import tvm.relay.testing

from tvm import relay, autotvm
from tvm.autotvm.tuner import XGBTuner
from tvm.contrib.utils import tempdir

import logging
logging.getLogger('autotvm').setLevel(logging.DEBUG)

def get_network(batch_size):
    input_shape = (batch_size, 3, 224, 224)
    output_shape = (batch_size, 1000)

    mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size)

    return mod, params, input_shape, output_shape


target = tvm.target.Target("llvm -device=arm_cpu -mtriple=aarch64-linux-gnu")
device_key = "homePi"

log_file = f"{device_key}.log"
dtype = "float32"

tuning_option = {
    "log_filename": log_file,
    "n_trial": 1500,
    "early_stopping": 800,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"),
        runner=autotvm.RPCRunner(
            device_key,
            host="192.168.65.1",
            port=9090,
            number=5,
            timeout=10,
        ),
    ),
}

def tune_tasks(
    tasks,
    measure_option,
    n_trial=1000,
    early_stopping=None,
    log_filename="tuning.log",
    use_transfer_learning=True,
):
    # create tmp log file
    tmp_log_file = log_filename + ".tmp"
    if os.path.exists(tmp_log_file):
        os.remove(tmp_log_file)

    for i, tsk in enumerate(reversed(tasks)):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # create tuner
        tuner_obj = XGBTuner(tsk, loss_type="reg")

        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

        # process tuning
        tsk_trial = min(n_trial, len(tsk.config_space))
        tuner_obj.tune(
            n_trial=tsk_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
                autotvm.callback.log_to_file(tmp_log_file),
            ],
        )

    # pick best records to a cache file
    autotvm.record.pick_best(tmp_log_file, log_filename)
    os.remove(tmp_log_file)

def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, _ = get_network(batch_size=1)
    tasks = autotvm.task.extract_from_program(
        mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),)
    )

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # export library
        tmp = tempdir()

        filename = "net.tar"
        lib.export_library(tmp.relpath(filename))

        # upload module to device
        print(f"Upload... {tmp}")
        remote = autotvm.measure.request_remote(device_key, "127.0.0.1", 9190, timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # upload parameters to device
        dev = remote.device(str(target), 0)
        module = runtime.GraphModule(rlib["default"](dev))
        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input("data", data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        print(module.benchmark(dev, number=1, repeat=10))


tune_and_evaluate(tuning_option)

The host and the port are those of the tracker. In your case, I’m guessing the host should be “0.0.0.0” and the port be 9190.

You don’t need --no-fork but that’s not relevant.

Thanks @mshr-h the host address was the problem. What a petty mistake :frowning_face:

When the homePi is registered, the following INFO message is given:

INFO:root:If you are running ROCM/Metal, fork will cause compiler internal error. Try to launch with arg --no-fork

So that’s why I added that option.

1 Like