Remove the low-latency usage flag (#214)

2025-06-26 18:28:11 +00:00 · 2025-06-16 13:30:14 +08:00
parent 1b92be8a71
commit 8aaddf76ae
6 changed files with 15 additions and 69 deletions
--- a/csrc/kernels/api.cuh
+++ b/csrc/kernels/api.cuh
@@ -147,9 +147,8 @@ void dispatch(void* packed_recv_x, void* packed_recv_x_scales,
              int num_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
              int num_topk, int num_experts, int rank, int num_ranks,
              bool use_fp8, bool round_scale, bool use_ue8m0,
-              void* workspace, int* usage_flag,
-              int num_device_sms, cudaStream_t stream,
-              int phases);
+              void* workspace, int num_device_sms,
+              cudaStream_t stream, int phases);

 void combine(void* combined_x,
             void* rdma_recv_x, int* rdma_recv_flag, void* rdma_send_x,
@@ -158,9 +157,8 @@ void combine(void* combined_x,
             int* next_clean, int num_next_clean_int,
             int num_combined_tokens, int hidden, int num_max_dispatch_tokens_per_rank,
             int num_topk, int num_experts, int rank, int num_ranks,
-             void* workspace, int* usage_flag,
-             int num_device_sms, cudaStream_t stream,
-             int phases, bool zero_copy);
+             void* workspace, int num_device_sms,
+             cudaStream_t stream, int phases, bool zero_copy);

 } // namespace internode_ll