mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
More assertions
This commit is contained in:
parent
72beb15827
commit
b09308b731
@ -275,7 +275,7 @@ dispatch(void* packed_recv_x, void* packed_recv_x_scales,
|
|||||||
// Wait tokens to arrive
|
// Wait tokens to arrive
|
||||||
// NOTES: using sub-warp 1 to overlap with sub-warp 0
|
// NOTES: using sub-warp 1 to overlap with sub-warp 0
|
||||||
int num_recv_tokens, recv_token_begin_idx;
|
int num_recv_tokens, recv_token_begin_idx;
|
||||||
EP_DEVICE_ASSERT(num_warps_per_group > 1);
|
EP_DEVICE_ASSERT(num_warps_per_group > 1 and num_warp_groups < 15);
|
||||||
if (sub_warp_id == 1 and lane_id == 0) {
|
if (sub_warp_id == 1 and lane_id == 0) {
|
||||||
while ((num_recv_tokens = ld_acquire_sys_global(rdma_recv_count + local_expert_idx * num_ranks + src_rank)) == 0);
|
while ((num_recv_tokens = ld_acquire_sys_global(rdma_recv_count + local_expert_idx * num_ranks + src_rank)) == 0);
|
||||||
num_recv_tokens = -num_recv_tokens - 1;
|
num_recv_tokens = -num_recv_tokens - 1;
|
||||||
@ -476,7 +476,7 @@ combine(void* combined_x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Put the finishing flag
|
// Put the finishing flag
|
||||||
EP_DEVICE_ASSERT(num_warps_per_group > 1);
|
EP_DEVICE_ASSERT(num_warps_per_group > 1 and num_warp_groups < 16);
|
||||||
asm volatile("bar.sync %0, %1;" :: "r"(warp_group_id + 1), "r"(num_warps_per_group * 32));
|
asm volatile("bar.sync %0, %1;" :: "r"(warp_group_id + 1), "r"(num_warps_per_group * 32));
|
||||||
if (sub_warp_id == 1 and lane_id == 0) {
|
if (sub_warp_id == 1 and lane_id == 0) {
|
||||||
while (ld_acquire_global(atomic_clean_flag) == 0);
|
while (ld_acquire_global(atomic_clean_flag) == 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user