Phasing out Legacy Components

But looks like currently c_host only provide code generation but lack runtime support, to produce:

import tvm
from tvm.script import ir as I
from tvm.script import tir as T

@I.ir_module
class Module:
    @T.prim_func
    def main(Q: T.Buffer((1, 4096, 32, 128), "float16")):
        T.func_attr({"target": T.target({"arch": "sm_89", "host": {"keys": ["cpu"], "kind": "llvm", "mtriple": "x86_64-unknown-linux-gnu", "tag": ""}, "keys": ["cuda", "gpu"], "kind": "cuda", "max_num_threads": 1024, "tag": "", "thread_warp_size": 32})})
        by = T.launch_thread("blockIdx.y", 32)
        v = T.launch_thread("threadIdx.x", 256)
        for i in T.vectorized(8):
            Q_shared = T.allocate([16384], "float16", "shared.dyn")
            Q_shared_1 = T.Buffer((16384,), "float16", data=Q_shared, scope="shared.dyn")
            Q_1 = T.Buffer((16777216,), "float16", data=Q.data)
            Q_shared_1[v * 8 + i] = Q_1[by * 128 + v * 8 + i]

mod = Module

rt_mod = tvm.build(mod, target="cuda", target_host="c")

print(rt_mod.get_source())
print(rt_mod.imported_modules[0].get_source())

import numpy as np
Q = tvm.nd.array(np.random.randn(1, 4096, 32, 128).astype("float16"), device=tvm.cuda())

rt_mod(Q)

'''output
Traceback (most recent call last):
  File "/root/tilelang/debug/unit_vectorize_test.py", line 28, in <module>
    rt_mod(Q)
  File "/usr/local/lib/python3.10/dist-packages/tilelang/3rdparty/tvm/python/tvm/runtime/module.py", line 201, in __call__
    return self.entry_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/tilelang/3rdparty/tvm/python/tvm/runtime/module.py", line 128, in entry_func
    self._entry = self.get_function(self.entry_name)
  File "/usr/local/lib/python3.10/dist-packages/tilelang/3rdparty/tvm/python/tvm/runtime/module.py", line 176, in get_function
    raise AttributeError(f"Module has no function '{name}'")
AttributeError: Module has no function '__tvm_main__'
'''