Support zero-copy for low-latency combine

2025-06-26 18:28:11 +00:00 · 2025-03-18 15:41:50 +08:00
parent 82dcf48fd3
commit dcaf73e5ff
7 changed files with 80 additions and 28 deletions
--- a/csrc/deep_ep.hpp
+++ b/csrc/deep_ep.hpp
@@ -143,7 +143,11 @@ public:
    low_latency_combine(const torch::Tensor& x, const torch::Tensor& topk_idx, const torch::Tensor& topk_weights,
                        const torch::Tensor& src_info, const torch::Tensor& layout_range,
                        int num_max_dispatch_tokens_per_rank, int num_experts,
-                        bool async, bool return_recv_hook, std::optional<torch::Tensor> out = std::nullopt);
+                        bool zero_copy, bool async, bool return_recv_hook,
+                        const std::optional<torch::Tensor>& out = std::nullopt);
+
+    torch::Tensor
+    get_next_low_latency_combine_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);
 };

 } // namespace deep_ep