mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Remove the low-latency usage flag (#214)
This commit is contained in:
@@ -71,10 +71,6 @@ private:
|
||||
volatile int* moe_recv_rdma_counter = nullptr;
|
||||
int* moe_recv_rdma_counter_mapped = nullptr;
|
||||
|
||||
// Host-side low-latency kernels' usages
|
||||
volatile int* low_latency_usage_flag = nullptr;
|
||||
int* low_latency_usage_flag_mapped = nullptr;
|
||||
|
||||
public:
|
||||
Buffer(int rank, int num_ranks, int64_t num_nvl_bytes, int64_t num_rdma_bytes, bool low_latency_mode);
|
||||
|
||||
@@ -134,8 +130,6 @@ public:
|
||||
const torch::Tensor& combined_rdma_head, const torch::Tensor& combined_nvl_head,
|
||||
const Config& config, std::optional<EventHandle>& previous_event, bool async, bool allocate_on_comm_stream);
|
||||
|
||||
uint64_t get_low_latency_usage_flag() const;
|
||||
|
||||
void clean_low_latency_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);
|
||||
|
||||
std::tuple<torch::Tensor, std::optional<torch::Tensor>, torch::Tensor, torch::Tensor, torch::Tensor, std::optional<EventHandle>, std::optional<std::function<void()>>>
|
||||
|
||||
Reference in New Issue
Block a user