Currently, I try to learn and understand TVM. I have a mxnet model and I want to do AutoTVM, so I try to use autotvm.task.extract_from_program
. The code is similar to this
target = tvm.target.cuda()
tasks = autotvm.task.extract_from_program(
mod["main"], target=target, target_host=None, params=param
)
I try to print the tasks and get this
Task(func_name=dense_nopack.x86, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_nopack.x86', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_pack.x86, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_pack.x86', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=batch_matmul.x86, args=(('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 106, 8), 'float32')), kwargs={}, workload=('batch_matmul.x86', ('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 106, 8), 'float32')))
Task(func_name=batch_matmul.x86, args=(('TENSOR', (8, 106, 106), 'float32'), ('TENSOR', (8, 8, 106), 'float32')), kwargs={}, workload=('batch_matmul.x86', ('TENSOR', (8, 106, 106), 'float32'), ('TENSOR', (8, 8, 106), 'float32')))
Task(func_name=dense_nopack.x86, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_nopack.x86', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_pack.x86, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_pack.x86', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 512), 'float32'), ('TENSOR', (2, 512), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 512), 'float32'), ('TENSOR', (2, 512), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 512), 'float32'), ('TENSOR', (2, 512), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 512), 'float32'), ('TENSOR', (2, 512), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 512), 'float32'), ('TENSOR', (512, 512), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 512), 'float32'), ('TENSOR', (512, 512), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 512), 'float32'), ('TENSOR', (512, 512), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 512), 'float32'), ('TENSOR', (512, 512), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (512, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (512, 64), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (512, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (512, 64), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (106, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 106, 36), 'float32'), ('TENSOR', (8, 8, 36), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 106, 36), 'float32'), ('TENSOR', (8, 8, 36), 'float32')))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 36, 8), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 36, 8), 'float32')))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 106, 106), 'float32'), ('TENSOR', (8, 8, 106), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 106, 106), 'float32'), ('TENSOR', (8, 8, 106), 'float32')))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 106, 8), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 106, 8), 'float32'), ('TENSOR', (8, 106, 8), 'float32')))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (64, 64), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (36, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (36, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (36, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (36, 128), 'float32'), ('TENSOR', (64, 128), 'float32'), None, 'float32'))
Task(func_name=dense_small_batch.cuda, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_small_batch.cuda', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'))
Task(func_name=dense_large_batch.cuda, args=(('TENSOR', (36, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'), kwargs={}, workload=('dense_large_batch.cuda', ('TENSOR', (36, 64), 'float32'), ('TENSOR', (128, 64), 'float32'), None, 'float32'))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 36, 36), 'float32'), ('TENSOR', (8, 8, 36), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 36, 36), 'float32'), ('TENSOR', (8, 8, 36), 'float32')))
Task(func_name=batch_matmul.cuda, args=(('TENSOR', (8, 36, 8), 'float32'), ('TENSOR', (8, 36, 8), 'float32')), kwargs={}, workload=('batch_matmul.cuda', ('TENSOR', (8, 36, 8), 'float32'), ('TENSOR', (8, 36, 8), 'float32')))
...
My question is that why I get the x86
tasks even if I specify the target to cuda?
Another thing is that when I try to do autoTVM with these task, I get these warning and GFLOPs always show 0 in first x86
tasks.
Variable `placeholder` is directly accessed by host memory (it is not contained in a thread environment or in the function arguments.
Thus, my work around method is to skip tune x86 tasks. Maybe, I do something wrong here. Is anyone have some clue about this warning?
Thank you.