mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Merge pull request #153 from wangfakang/opt-shuffled_dst
Shuffling the starting index of target rank for different ranks and channels
This commit is contained in:
commit
f0a9f10629
@ -703,7 +703,8 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv
|
|||||||
int last_issued_tail = 0;
|
int last_issued_tail = 0;
|
||||||
while (__any_sync(0xffffffff, num_tokens_to_send > 0)) {
|
while (__any_sync(0xffffffff, num_tokens_to_send > 0)) {
|
||||||
for (int i = 0, synced_num_tokens_to_send; i < kNumRDMARanks; ++ i) {
|
for (int i = 0, synced_num_tokens_to_send; i < kNumRDMARanks; ++ i) {
|
||||||
int dst_rdma_rank = (i + channel_id) % kNumRDMARanks;
|
// To mitigate incast congestion, shuffle the starting index of target rank for different ranks and channels
|
||||||
|
int dst_rdma_rank = (i + channel_id + rdma_rank) % kNumRDMARanks;
|
||||||
synced_num_tokens_to_send = __shfl_sync(0xffffffff, num_tokens_to_send, dst_rdma_rank);
|
synced_num_tokens_to_send = __shfl_sync(0xffffffff, num_tokens_to_send, dst_rdma_rank);
|
||||||
if (synced_num_tokens_to_send == 0)
|
if (synced_num_tokens_to_send == 0)
|
||||||
continue;
|
continue;
|
||||||
|
Loading…
Reference in New Issue
Block a user