Support zero-copy for low-latency combine

This commit is contained in:
Chenggang Zhao
2025-03-18 15:41:50 +08:00
parent 82dcf48fd3
commit dcaf73e5ff
7 changed files with 80 additions and 28 deletions

View File

@@ -143,7 +143,11 @@ public:
low_latency_combine(const torch::Tensor& x, const torch::Tensor& topk_idx, const torch::Tensor& topk_weights,
const torch::Tensor& src_info, const torch::Tensor& layout_range,
int num_max_dispatch_tokens_per_rank, int num_experts,
bool async, bool return_recv_hook, std::optional<torch::Tensor> out = std::nullopt);
bool zero_copy, bool async, bool return_recv_hook,
const std::optional<torch::Tensor>& out = std::nullopt);
torch::Tensor
get_next_low_latency_combine_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);
};
} // namespace deep_ep