mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Add automatic warp count control for low-latency kernels (#213)
* Add automatic warp count control for low-latency dispatch * Add automatic warp count control for low-latency combine * More assertions
This commit is contained in:
@@ -41,6 +41,7 @@ private:
|
||||
|
||||
// Device info and communication
|
||||
int device_id;
|
||||
int num_device_sms;
|
||||
int rank, rdma_rank, nvl_rank;
|
||||
int num_ranks, num_rdma_ranks, num_nvl_ranks;
|
||||
cudaIpcMemHandle_t ipc_handles[NUM_MAX_NVL_PEERS];
|
||||
|
||||
Reference in New Issue
Block a user