From 371df2da52c815c80518741250ba667dc0f770a8 Mon Sep 17 00:00:00 2001 From: Chenggang Zhao Date: Fri, 20 Jun 2025 13:44:49 +0800 Subject: [PATCH] Fix bugs --- csrc/kernels/internode.cu | 2 +- tests/test_low_latency.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/kernels/internode.cu b/csrc/kernels/internode.cu index 13f2824..339c2a9 100644 --- a/csrc/kernels/internode.cu +++ b/csrc/kernels/internode.cu @@ -793,7 +793,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv if (lane_id == 0) { nvshmemi_ibgda_amo_nonfetch_add(rdma_channel_head.buffer(rdma_rank), cached_rdma_channel_tail - cached_rdma_channel_head, - translate_dst_rdma_rank(lane_id, nvl_rank), + translate_dst_rdma_rank(src_rdma_rank, nvl_rank), channel_id + num_channels, src_rdma_rank == rdma_rank); cached_rdma_channel_head = cached_rdma_channel_tail; } diff --git a/tests/test_low_latency.py b/tests/test_low_latency.py index b39be53..9c35980 100644 --- a/tests/test_low_latency.py +++ b/tests/test_low_latency.py @@ -15,7 +15,7 @@ def test_main(num_tokens: int, hidden: int, num_experts: int, num_topk: int, assert num_experts % num_ranks == 0 num_local_experts = num_experts // num_ranks - # NOTES: the integers greater than 256 exceeds the BF16 precision limit + # NOTES: the integers greater than 256 exceed the BF16 precision limit rank_offset = 128 assert num_ranks - rank_offset < 257, 'Too many ranks (exceeding test precision limit)'