Profile mlc_chat_cli on android phone

yige0617 · March 13, 2024, 6:44am

I tried to do per op profiling on android phone for mlc-chat. I add following code tvm::Device gpu; gpu.device_type = kDLOpenCL; int device_type = gpu.device_type; gpu.device_id = device_id; std::ifstream loaded_json(model.config); std::string json_data((std::istreambuf_iterator(loaded_json)), std::istreambuf_iterator()); loaded_json.close(); std::string lib_path_string = model.lib.parent_path().string(); std::string model_path_string = model.config.string(); std::cout <<"model_path_string "<<model_path_string<<std::endl; tvm::runtime::Module loaded_lib = tvm::runtime::Module::LoadFromFile(lib_path); tvm::runtime::Module executor = (*tvm::runtime::Registry::Get(“tvm.graph_executor_debug.create”))(json_data, loaded_lib, device_type, gpu.device_id); tvm::runtime::PackedFunc profile = executor.GetFunction(“profile”);

But I don’t know where I should get model json file, initially, I thought it is mlc-chat-config.json, I realized it means network json. Could you tell me how to dump the json file? This my first time on TVM, it might be very trivial questions. Thanks.

srkreddy1238 · March 14, 2024, 10:53am

MLC uses vm executor(not graph executor).

Both of them ends up in opencl runtime and the OpenCL kernel launch/enqueu happens at

github.com

apache/tvm/blob/c00cc031de929ba04f46689bb42ced1a3a44d3ef/src/runtime/opencl/opencl_module.cc#L85


   cl_command_queue queue = w_->GetQueue(t->device);
   ThreadWorkLoad wl = launch_param_config_.Extract(args);
   cl_uint work_dim = static_cast<cl_uint>(launch_param_config_.work_dim());
   for (cl_uint i = 0; i < work_dim; ++i) {
     wl.work_size[i] *= wl.work_size[i + 3];
   }
   // launch kernel


   if (w_->IsProfiling(t->device)) {
     w_->GetEventQueue(t->device).resize(w_->GetEventQueue(t->device).size() + 1);
     OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
                                        wl.work_size + 3, 0, nullptr,
                                        &(w_->GetEventQueue(t->device).back())));
   } else {
     OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
                                        wl.work_size + 3, 0, nullptr, nullptr));
   }
 }


private:
 // global workspace.

You may try using OpenCL profiling using events here.

Also, you might need to enabled opencl profiling while creating queue at

github.com

apache/tvm/blob/695f958bc9ef40e625a84ad9355df2e75e6498a0/src/runtime/opencl/opencl_device_api.cc#L541


  ICHECK_EQ(this->queues.size(), 0U);
  cl_int err_code;
  for (auto& [platform, devices] : device_map) {
    this->platform_ids.push_back(platform);
    this->contexts[platform] =
        clCreateContext(nullptr, devices.size(), &(devices[0]), nullptr, nullptr, &err_code);
    this->devices.insert(this->devices.end(), devices.begin(), devices.end());
    for (size_t i = 0; i < devices.size(); ++i) {
      cl_device_id did = devices[i];
      device_to_platform[did] = platform;
      this->queues.push_back(clCreateCommandQueue(this->contexts[platform], did, 0, &err_code));
      OPENCL_CHECK_ERROR(err_code);
    }
    OPENCL_CHECK_ERROR(err_code);
  }
  this->events.resize(this->devices.size());
  initialized_ = true;
}


TVM_REGISTER_GLOBAL("device_api.opencl.alloc_nd").set_body([](TVMArgs args, TVMRetValue* rv) {
  int32_t device_type = args[0];

yige0617 · March 14, 2024, 6:45pm

@srkreddy1238 really appreciated for your reply, I tried to profile it on per Op based. Any feedback how to do it?