Support CUDA graph for intranode normal kernels (#203)

2025-06-26 18:28:11 +00:00 · 2025-06-11 11:08:54 +08:00
parent 8da2d7b38d
commit a8299ca7c2
7 changed files with 86 additions and 38 deletions
--- a/csrc/kernels/api.cuh
+++ b/csrc/kernels/api.cuh
@@ -45,7 +45,7 @@ void cached_notify_dispatch(const int* rank_prefix_matrix, int num_memset_int,
 void dispatch(void* recv_x, float* recv_x_scales, int* recv_src_idx, int64_t* recv_topk_idx, float* recv_topk_weights, int* recv_channel_offset,
              int* send_head, const void* x, const float* x_scales, const int64_t* topk_idx, const float* topk_weights,
              const bool* is_token_in_rank, const int* channel_prefix_matrix,
-              int num_tokens, int hidden_int4, int num_topk, int num_experts, int num_scales,
+              int num_tokens, int num_worst_tokens, int hidden_int4, int num_topk, int num_experts, int num_scales,
              void** buffer_ptrs, int rank, int num_ranks,
              cudaStream_t stream, int num_sms,
              int num_max_send_tokens, int num_recv_buffer_tokens);