From 8da790e3f3232f48838a47c5a69ecd8eaddad110 Mon Sep 17 00:00:00 2001 From: Chenggang Zhao Date: Fri, 20 Jun 2025 11:31:57 +0800 Subject: [PATCH] Fix the shifted buffer pointer --- csrc/kernels/internode.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csrc/kernels/internode.cu b/csrc/kernels/internode.cu index 9a98887..13f2824 100644 --- a/csrc/kernels/internode.cu +++ b/csrc/kernels/internode.cu @@ -743,6 +743,8 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv dst_slot_idx = __shfl_sync(0xffffffff, dst_slot_idx % num_max_nvl_chunked_recv_tokens, 0); // Copy data + // The `shifted` should be restored + shifted = rdma_channel_data.recv_buffer(src_rdma_rank) + rdma_slot_idx * num_bytes_per_rdma_token; UNROLLED_WARP_COPY(5, lane_id, hidden_int4, nvl_channel_x.buffer() + dst_slot_idx * hidden_int4, reinterpret_cast(shifted),