From dd133d39bce06469292311a4accf0ae79dcb45fa Mon Sep 17 00:00:00 2001 From: Shangyan Zhou Date: Mon, 16 Jun 2025 17:05:11 +0800 Subject: [PATCH] Fix warp synchronization. (#215) * Fix warp synchronization. * Another fix. --- csrc/kernels/internode.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/csrc/kernels/internode.cu b/csrc/kernels/internode.cu index 65858d4..4a33f17 100644 --- a/csrc/kernels/internode.cu +++ b/csrc/kernels/internode.cu @@ -567,6 +567,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv // Update last token tail if (last_rdma_tail_idx >= 0) st_release_cta(const_cast(rdma_send_channel_tail + lane_id), last_rdma_tail_idx + 1); + __syncwarp(); // Release sequential lock lane_id == 0 ? (rdma_send_next_token_idx += 1) : 0; @@ -633,6 +634,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv nvshmemi_ibgda_amo_nonfetch_add(rdma_channel_tail.buffer(rdma_rank), num_tokens_to_issue, translate_dst_rdma_rank(dst_rdma_rank, nvl_rank), channel_id, dst_rdma_rank == rdma_rank); } + __syncwarp(); } } } else if (warp_role == WarpRole::kRDMAAndNVLForwarder) {