mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Support zero-copy for low-latency combine
This commit is contained in:
@@ -143,7 +143,11 @@ public:
|
||||
low_latency_combine(const torch::Tensor& x, const torch::Tensor& topk_idx, const torch::Tensor& topk_weights,
|
||||
const torch::Tensor& src_info, const torch::Tensor& layout_range,
|
||||
int num_max_dispatch_tokens_per_rank, int num_experts,
|
||||
bool async, bool return_recv_hook, std::optional<torch::Tensor> out = std::nullopt);
|
||||
bool zero_copy, bool async, bool return_recv_hook,
|
||||
const std::optional<torch::Tensor>& out = std::nullopt);
|
||||
|
||||
torch::Tensor
|
||||
get_next_low_latency_combine_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);
|
||||
};
|
||||
|
||||
} // namespace deep_ep
|
||||
|
||||
Reference in New Issue
Block a user