mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Fix bugs
This commit is contained in:
@@ -793,7 +793,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv
|
||||
if (lane_id == 0) {
|
||||
nvshmemi_ibgda_amo_nonfetch_add(rdma_channel_head.buffer(rdma_rank),
|
||||
cached_rdma_channel_tail - cached_rdma_channel_head,
|
||||
translate_dst_rdma_rank<kLowLatencyMode>(lane_id, nvl_rank),
|
||||
translate_dst_rdma_rank<kLowLatencyMode>(src_rdma_rank, nvl_rank),
|
||||
channel_id + num_channels, src_rdma_rank == rdma_rank);
|
||||
cached_rdma_channel_head = cached_rdma_channel_tail;
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ def test_main(num_tokens: int, hidden: int, num_experts: int, num_topk: int,
|
||||
assert num_experts % num_ranks == 0
|
||||
num_local_experts = num_experts // num_ranks
|
||||
|
||||
# NOTES: the integers greater than 256 exceeds the BF16 precision limit
|
||||
# NOTES: the integers greater than 256 exceed the BF16 precision limit
|
||||
rank_offset = 128
|
||||
assert num_ranks - rank_offset < 257, 'Too many ranks (exceeding test precision limit)'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user