mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Fix notify_dispatch: using warp 0 to issue send
Signed-off-by: wzc.wuzhicheng <wzc.wuzhicheng@linux.alibaba.com>
This commit is contained in:
parent
9fe9021f29
commit
d0225df27d
@ -254,10 +254,12 @@ notify_dispatch(const int* num_tokens_per_rank, int* moe_recv_counter_mapped, in
|
||||
// TODO: overlap EP barrier and NVL cleaning
|
||||
for (int i = 0; i < kNumRDMARanks; ++i) {
|
||||
if (i != rdma_rank) {
|
||||
nvshmemi_ibgda_put_nbi_warp<true>(reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.recv_buffer(rdma_rank)),
|
||||
reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.send_buffer(i)),
|
||||
(NUM_MAX_NVL_PEERS + num_rdma_experts + 1) * sizeof(int),
|
||||
translate_dst_rdma_rank<kLowLatencyMode>(i, nvl_rank), 0, lane_id, 0);
|
||||
if (warp_id == 0) {
|
||||
nvshmemi_ibgda_put_nbi_warp<true>(reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.recv_buffer(rdma_rank)),
|
||||
reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.send_buffer(i)),
|
||||
(NUM_MAX_NVL_PEERS + num_rdma_experts + 1) * sizeof(int),
|
||||
translate_dst_rdma_rank<kLowLatencyMode>(i, nvl_rank), 0, lane_id, 0);
|
||||
}
|
||||
} else {
|
||||
UNROLLED_WARP_COPY(1, lane_id, NUM_MAX_NVL_PEERS + num_rdma_experts + 1,
|
||||
rdma_recv_num_tokens_mixed.recv_buffer(rdma_rank),
|
||||
|
Loading…
Reference in New Issue
Block a user