This commit is contained in:
Chenggang Zhao 2025-06-23 17:58:44 +08:00
parent 901cdf79be
commit fdb41efbd3

View File

@ -470,7 +470,7 @@ dispatch(int4* recv_x, float* recv_x_scales, int64_t* recv_topk_idx, float* recv
// Read RDMA rank existence // Read RDMA rank existence
uint64_t is_token_in_rank_uint64 = 0; uint64_t is_token_in_rank_uint64 = 0;
if (lane_id < kNumRDMARanks) { if (lane_id < kNumRDMARanks) {
is_token_in_rank_uint64 = *reinterpret_cast<const uint64_t*>(is_token_in_rank + token_idx * num_ranks + lane_id * NUM_MAX_NVL_PEERS); is_token_in_rank_uint64 = __ldg(reinterpret_cast<const uint64_t*>(is_token_in_rank + token_idx * num_ranks + lane_id * NUM_MAX_NVL_PEERS));
global_rdma_tail_idx += (is_token_in_rank_uint64 != 0); global_rdma_tail_idx += (is_token_in_rank_uint64 != 0);
} }
__syncwarp(); __syncwarp();