Sure, I don’t have the 15 hours run log anymore, but I can provide a log built using "n_trial" : 5
(for quick debug, I’ll launch a new tuning tonight with larger n_trial)
Error log print by my newly added debug info :
kernelName:fuse_conv2d_broadcast_add_relu_1_kernel0
0. global_work_size=112 local_work_size:112
1. global_work_size=28 local_work_size:2
2. global_work_size=32 local_work_size:1
I found a warning at the end of tuning, Can this be the root cause?
WARNING:autotvm:Cannot find config for target=opencl -device=adreno, workload=('depthwise_conv2d_nchw', (1, 32, 112, 112, 'float32'), (32, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
The log_file of TVM
{"i": ["opencl -device=adreno", "topi_nn_dense", [["TENSOR", [1, 1024], "float32"], ["TENSOR", [1000, 1024], "float32"], null], {}, null, {"i": 0, "t": "winograd", "c": null, "e": []}], "r": [[0.0015796354], 0, 0.6794140338897705, 1551918925.7954354], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7006, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 16, 4, 4]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 1, 7, 1]], ["tile_rc", "sp", [512, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.22823480200000001], 0, 65.10172700881958, 1551919113.8254087], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 26620, "t": "direct", "c": null, "e": [["tile_f", "sp", [128, 2, 4, 1]], ["tile_y", "sp", [1, 7, 1, 1]], ["tile_x", "sp", [1, 1, 1, 7]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.0233335468], 0, 18.761876344680786, 1551919133.5696566], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 512, 7, 7], "float32"], ["TENSOR", [1024, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 7, 7, "float32"], [1024, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 237835, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 8, 1, 8]], ["tile_y", "sp", [1, 1, 1, 7]], ["tile_x", "sp", [1, 1, 1, 7]], ["tile_rc", "sp", [256, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 1]]}], "r": [[0.8051178643], 0, 31.408406734466553, 1551919165.967637], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 995, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 1, 4, 4]], ["tile_y", "sp", [7, 1, 1, 1]], ["tile_x", "sp", [1, 7, 1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.028221802], 0, 1.5789644718170166, 1551919169.0258257], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 512, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 512, 14, 14, "float32"], [512, 512, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1147973, "t": "direct", "c": null, "e": [["tile_f", "sp", [32, 8, 2, 1]], ["tile_y", "sp", [2, 7, 1, 1]], ["tile_x", "sp", [2, 1, 7, 1]], ["tile_rc", "sp", [512, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.2915327811], 0, 74.13661241531372, 1551919250.1008255], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 512, 14, 14], "float32"], ["TENSOR", [512, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 512, 14, 14, "float32"], [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 261820, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 2, 4, 1]], ["tile_y", "sp", [2, 1, 7, 1]], ["tile_x", "sp", [1, 7, 1, 2]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0127529895], 0, 3.613858222961426, 1551919256.5462072], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 256, 14, 14], "float32"], ["TENSOR", [512, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 14, 14, "float32"], [512, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 1377259, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 16, 1, 2]], ["tile_y", "sp", [7, 1, 2, 1]], ["tile_x", "sp", [1, 2, 7, 1]], ["tile_rc", "sp", [4, 64]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[0.1201739687], 0, 21.88442587852478, 1551919279.8842967], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 59948, "t": "direct", "c": null, "e": [["tile_f", "sp", [64, 1, 2, 2]], ["tile_y", "sp", [1, 1, 7, 2]], ["tile_x", "sp", [2, 1, 7, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.007445703200000001], 0, 0.7793385982513428, 1551919322.245936], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 256, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 256, 28, 28, "float32"], [256, 256, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 8375960, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 32, 4, 2]], ["tile_y", "sp", [4, 7, 1, 1]], ["tile_x", "sp", [1, 1, 7, 4]], ["tile_rc", "sp", [16, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.9702054840000001], 0, 31.55023217201233, 1551919354.8292158], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 256, 28, 28], "float32"], ["TENSOR", [256, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 256, 28, 28, "float32"], [256, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1127824, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 16, 1, 2]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [7, 1, 4, 1]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 1]]}], "r": [[0.0374450677], 0, 7.278401136398315, 1551919363.329563], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 128, 28, 28], "float32"], ["TENSOR", [256, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 28, 28, "float32"], [256, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 217071, "t": "direct", "c": null, "e": [["tile_f", "sp", [4, 4, 4, 4]], ["tile_y", "sp", [1, 1, 4, 7]], ["tile_x", "sp", [1, 4, 1, 7]], ["tile_rc", "sp", [128, 1]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.349284354], 0, 11.665324687957764, 1551919376.1536279], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 33828, "t": "direct", "c": null, "e": [["tile_f", "sp", [1, 2, 4, 16]], ["tile_y", "sp", [14, 2, 1, 1]], ["tile_x", "sp", [7, 2, 2, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.051596276100000005], 0, 2.215299606323242, 1551919396.7941692], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 128, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 128, 56, 56, "float32"], [128, 128, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 3150382, "t": "direct", "c": null, "e": [["tile_f", "sp", [8, 2, 8, 1]], ["tile_y", "sp", [1, 28, 2, 1]], ["tile_x", "sp", [28, 1, 2, 1]], ["tile_rc", "sp", [8, 16]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 0]]}], "r": [[0.1357756562], 0, 4.70797061920166, 1551919406.3744864], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 128, 56, 56], "float32"], ["TENSOR", [128, 1, 3, 3], "float32"], [1, 1], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 128, 56, 56, "float32"], [128, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], {"i": 1377910, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 1, 2, 4]], ["tile_y", "sp", [4, 1, 7, 2]], ["tile_x", "sp", [1, 4, 2, 7]], ["auto_unroll_max_step", "ot", 256], ["unroll_explicit", "ot", 0]]}], "r": [[0.0433779843], 0, 2.1917519569396973, 1551919466.5734634], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 64, 56, 56], "float32"], ["TENSOR", [128, 64, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 64, 56, 56, "float32"], [128, 64, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 7564055, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 2, 4, 8]], ["tile_y", "sp", [2, 1, 2, 14]], ["tile_x", "sp", [7, 1, 1, 8]], ["tile_rc", "sp", [16, 4]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.102741901], 0, 60.07958197593689, 1551919527.946826], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 64, 112, 112], "float32"], ["TENSOR", [64, 1, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 64, 112, 112, "float32"], [64, 1, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "float32"], {"i": 1819778, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 4, 1, 1]], ["tile_y", "sp", [2, 1, 4, 7]], ["tile_x", "sp", [28, 1, 1, 2]], ["auto_unroll_max_step", "ot", 0], ["unroll_explicit", "ot", 1]]}], "r": [[0.1589000572], 0, 5.527170419692993, 1551919548.8157806], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 32, 112, 112], "float32"], ["TENSOR", [64, 32, 1, 1], "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 32, 112, 112, "float32"], [64, 32, 1, 1, "float32"], [1, 1], [0, 0], [1, 1], "NCHW", "float32"], {"i": 21574337, "t": "direct", "c": null, "e": [["tile_f", "sp", [16, 2, 1, 2]], ["tile_y", "sp", [7, 4, 1, 4]], ["tile_x", "sp", [7, 8, 2, 1]], ["tile_rc", "sp", [16, 2]], ["tile_ry", "sp", [1, 1]], ["tile_rx", "sp", [1, 1]], ["auto_unroll_max_step", "ot", 1500], ["unroll_explicit", "ot", 0]]}], "r": [[1.04930626], 0, 33.70065188407898, 1551919583.686777], "v": 0.1}
{"i": ["opencl -device=adreno", "topi_nn_conv2d", [["TENSOR", [1, 3, 224, 224], "float32"], ["TENSOR", [32, 3, 3, 3], "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {}, ["conv2d", [1, 3, 224, 224, "float32"], [32, 3, 3, 3, "float32"], [2, 2], [1, 1], [1, 1], "NCHW", "float32"], {"i": 15002129, "t": "direct", "c": null, "e": [["tile_f", "sp", [2, 8, 2, 1]], ["tile_y", "sp", [28, 1, 1, 4]], ["tile_x", "sp", [16, 1, 1, 7]], ["tile_rc", "sp", [1, 3]], ["tile_ry", "sp", [3, 1]], ["tile_rx", "sp", [1, 3]], ["auto_unroll_max_step", "ot", 512], ["unroll_explicit", "ot", 0]]}], "r": [[0.7321428955], 0, 23.286165952682495, 1551919609.88036], "v": 0.1}