Using win10, Nvidia GT 1030 ; following the tutorial: Tutorials» Tuning High Performance Convolution on NVIDIA GPUs when running tune_conv2d_cuda.py, error occured when doing the tuning step, it looks like RPC related. If anybody can help, thanks a lot !
CODE:
import logging import sys import numpy as np import tvm import topi from topi.testing import conv2d_nchw_python from tvm import autotvm @autotvm.template def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, padding): assert N == 1, “Only consider batch_size = 1 in this template” data = tvm.placeholder((N, CI, H, W), name=‘data’) kernel = tvm.placeholder((CO, CI, KH, KW), name=‘kernel’) conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=1, out_dtype=‘float32’) s = tvm.create_schedule([conv.op]) n, f, y, x = s[conv].op.axis rc, ry, rx = s[conv].op.reduce_axis cfg = autotvm.get_config() cfg.define_split(“tile_f”, f, num_outputs=4) cfg.define_split(“tile_y”, y, num_outputs=4) cfg.define_split(“tile_x”, x, num_outputs=4) cfg.define_split(“tile_rc”, rc, num_outputs=3) cfg.define_split(“tile_ry”, ry, num_outputs=3) cfg.define_split(“tile_rx”, rx, num_outputs=3) cfg.define_knob(“auto_unroll_max_step”, [0, 512, 1500]) cfg.define_knob(“unroll_explicit”, [0, 1]) pad_data = s[conv].op.input_tensors[0] s[pad_data].compute_inline() data, raw_data = pad_data, data output = conv OL = s.cache_write(conv, ‘local’) AA = s.cache_read(data, ‘shared’, [OL]) WW = s.cache_read(kernel, ‘shared’, [OL]) AL = s.cache_read(AA, ‘local’, [OL]) WL = s.cache_read(WW, ‘local’, [OL]) n, f, y, x = s[output].op.axis bf, vf, tf, fi = cfg[“tile_f”].apply(s, output, f) by, vy, ty, yi = cfg[“tile_y”].apply(s, output, y) bx, vx, tx, xi = cfg[“tile_x”].apply(s, output, x) kernel_scope = n # this is the scope to attach global config inside this kernel s[output].bind(bf, tvm.thread_axis(“blockIdx.z”)) s[output].bind(by, tvm.thread_axis(“blockIdx.y”)) s[output].bind(bx, tvm.thread_axis(“blockIdx.x”)) s[output].bind(vf, tvm.thread_axis(“vthread”)) s[output].bind(vy, tvm.thread_axis(“vthread”)) s[output].bind(vx, tvm.thread_axis(“vthread”)) s[output].bind(tf, tvm.thread_axis(“threadIdx.z”)) s[output].bind(ty, tvm.thread_axis(“threadIdx.y”)) s[output].bind(tx, tvm.thread_axis(“threadIdx.x”)) s[output].reorder(n, bf, by, bx, vf, vy, vx, tf, ty, tx, fi, yi, xi) s[OL].compute_at(s[output], tx) n, f, y, x = s[OL].op.axis rc, ry, rx = s[OL].op.reduce_axis rco, rcm, rci = cfg[‘tile_rc’].apply(s, OL, rc) ryo, rym, ryi = cfg[‘tile_rx’].apply(s, OL, ry) rxo, rxm, rxi = cfg[‘tile_ry’].apply(s, OL, rx) s[OL].reorder(rco, ryo, rxo, rcm, rym, rxm, rci, ryi, rxi, n, f, y, x) s[AA].compute_at(s[OL], rxo) s[WW].compute_at(s[OL], rxo) s[AL].compute_at(s[OL], rxm) s[WL].compute_at(s[OL], rxm) for load in [AA, WW]: n, f, y, x = s[load].op.axis fused = s[load].fuse(n, f, y, x) tz, fused = s[load].split(fused, nparts=cfg[“tile_f”].size[2]) ty, fused = s[load].split(fused, nparts=cfg[“tile_y”].size[2]) tx, fused = s[load].split(fused, nparts=cfg[“tile_x”].size[2]) s[load].bind(tz, tvm.thread_axis(“threadIdx.z”)) s[load].bind(ty, tvm.thread_axis(“threadIdx.y”)) s[load].bind(tx, tvm.thread_axis(“threadIdx.x”)) s[output].pragma(kernel_scope, ‘auto_unroll_max_step’, cfg[‘auto_unroll_max_step’].val) s[output].pragma(kernel_scope, ‘unroll_explicit’, cfg[‘unroll_explicit’].val) return s, [raw_data, kernel, conv]
logging.getLogger(‘autotvm’).setLevel(logging.DEBUG) logging.getLogger(‘autotvm’).addHandler(logging.StreamHandler(sys.stdout)) N, H, W, CO, CI, KH, KW, strides, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1) task = autotvm.task.create(conv2d_no_batching, args=(N, H, W, CO, CI, KH, KW, strides, padding), target=‘cuda’) print(task.config_space) measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(repeat=3, min_repeat_ms=100, timeout=4) ) tuner = autotvm.tuner.XGBTuner(task) tuner.tune(n_trial=20, measure_option=measure_option, callbacks=[autotvm.callback.log_to_file(‘conv2d.log’)])
dispatch_context = autotvm.apply_history_best(“conv2d.log”) best_config = dispatch_context.query(task.target, task.workload) print(“\nBest config:”) print(best_config) with autotvm.apply_history_best(‘conv2d.log’): with tvm.target.create(“cuda”): s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides, padding) func = tvm.build(s, arg_bufs) a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32) w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32) c_np = conv2d_nchw_python(a_np, w_np, strides, padding) ctx = tvm.gpu() a_tvm = tvm.nd.array(a_np, ctx=ctx) w_tvm = tvm.nd.array(w_np, ctx=ctx) c_tvm = tvm.nd.empty(c_np.shape, ctx=ctx) func(a_tvm, w_tvm, c_tvm) tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2) evaluator = func.time_evaluator(func.entry_name, ctx, number=400) print(‘Time cost of this operator: %f’ % evaluator(a_tvm, w_tvm, c_tvm).mean)
ERROR:
Exception in thread Thread-5: Traceback (most recent call last): File “D:\python3.7 (64-bit)\lib\threading.py”, line 926, in _bootstrap_inner self.run() File “D:\python3.7 (64-bit)\lib\threading.py”, line 870, in run self._target(*self._args, **self._kwargs) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\measure\measure_methods.py”, line 572, in _check remote = request_remote(device_key, host, port, priority) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\measure\measure_methods.py”, line 539, in request_remote tracker = _rpc.connect_tracker(host, port) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 430, in connect_tracker return TrackerSession((url, port)) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 221, in init self._connect() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 227, in _connect self._sock = base.connect_with_retry(self._addr) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\base.py”, line 171, in connect_with_retry raise sock_err File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\base.py”, line 167, in connect_with_retry sock.connect(addr) OSError: [WinError 10049] The requested address is not valid in its context
Get devices for measurement successfully! Traceback (most recent call last): File “”, line 3, in File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\tuner\xgboost_tuner.py”, line 86, in tune super(XGBTuner, self).tune(*args, **kwargs) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\tuner\tuner.py”, line 108, in tune measure_batch = create_measure_batch(self.task, measure_option) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\measure\measure.py”, line 256, in create_measure_batch build_kwargs = runner.get_build_kwargs() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\measure\measure_methods.py”, line 231, in get_build_kwargs remote = request_remote(self.key, self.host, self.port) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\autotvm\measure\measure_methods.py”, line 539, in request_remote tracker = _rpc.connect_tracker(host, port) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 430, in connect_tracker return TrackerSession((url, port)) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 221, in init self._connect() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\client.py”, line 227, in _connect self._sock = base.connect_with_retry(self._addr) File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\base.py”, line 171, in connect_with_retry raise sock_err File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\base.py”, line 167, in connect_with_retry sock.connect(addr) OSError: [WinError 10049] The requested address is not valid in its context Exception ignored in: <function Server.del at 0x00000274F6AF4828> Traceback (most recent call last): File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\server.py”, line 408, in del self.terminate() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\server.py”, line 400, in terminate os.killpg(self.proc.pid, signal.SIGTERM) AttributeError: module ‘os’ has no attribute ‘killpg’ Exception ignored in: <function Tracker.del at 0x00000274F8682678> Traceback (most recent call last): File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\tracker.py”, line 431, in del self.terminate() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\tracker.py”, line 423, in terminate self._stop_tracker() File “C:\Users\ZhangHan\AppData\Roaming\Python\Python37\site-packages\tvm-0.6.dev0-py3.7-win-amd64.egg\tvm\rpc\tracker.py”, line 411, in _stop_tracker sock.connect((self.host, self.port)) OSError: [WinError 10049] The requested address is not valid in its context