This commit is contained in:
Chenggang Zhao
2025-06-20 13:44:49 +08:00
parent 8da790e3f3
commit 371df2da52
2 changed files with 2 additions and 2 deletions

View File

@@ -793,7 +793,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv
if (lane_id == 0) {
nvshmemi_ibgda_amo_nonfetch_add(rdma_channel_head.buffer(rdma_rank),
cached_rdma_channel_tail - cached_rdma_channel_head,
translate_dst_rdma_rank<kLowLatencyMode>(lane_id, nvl_rank),
translate_dst_rdma_rank<kLowLatencyMode>(src_rdma_rank, nvl_rank),
channel_id + num_channels, src_rdma_rank == rdma_rank);
cached_rdma_channel_head = cached_rdma_channel_tail;
}

View File

@@ -15,7 +15,7 @@ def test_main(num_tokens: int, hidden: int, num_experts: int, num_topk: int,
assert num_experts % num_ranks == 0
num_local_experts = num_experts // num_ranks
# NOTES: the integers greater than 256 exceeds the BF16 precision limit
# NOTES: the integers greater than 256 exceed the BF16 precision limit
rank_offset = 128
assert num_ranks - rank_offset < 257, 'Too many ranks (exceeding test precision limit)'