Remove the low-latency usage flag (#214)

This commit is contained in:
Chenggang Zhao
2025-06-16 13:30:14 +08:00
committed by GitHub
parent 1b92be8a71
commit 8aaddf76ae
6 changed files with 15 additions and 69 deletions

View File

@@ -147,9 +147,8 @@ void dispatch(void* packed_recv_x, void* packed_recv_x_scales,
int num_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
int num_topk, int num_experts, int rank, int num_ranks,
bool use_fp8, bool round_scale, bool use_ue8m0,
void* workspace, int* usage_flag,
int num_device_sms, cudaStream_t stream,
int phases);
void* workspace, int num_device_sms,
cudaStream_t stream, int phases);
void combine(void* combined_x,
void* rdma_recv_x, int* rdma_recv_flag, void* rdma_send_x,
@@ -158,9 +157,8 @@ void combine(void* combined_x,
int* next_clean, int num_next_clean_int,
int num_combined_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
int num_topk, int num_experts, int rank, int num_ranks,
void* workspace, int* usage_flag,
int num_device_sms, cudaStream_t stream,
int phases, bool zero_copy);
void* workspace, int num_device_sms,
cudaStream_t stream, int phases, bool zero_copy);
} // namespace internode_ll