Remove the low-latency usage flag (#214)

2025-06-26 18:28:11 +00:00 · 2025-06-16 13:30:14 +08:00
parent 1b92be8a71
commit 8aaddf76ae
6 changed files with 15 additions and 69 deletions
--- a/csrc/deep_ep.hpp
+++ b/csrc/deep_ep.hpp
@@ -71,10 +71,6 @@ private:
    volatile int* moe_recv_rdma_counter = nullptr;
    int* moe_recv_rdma_counter_mapped = nullptr;

-    // Host-side low-latency kernels' usages
-    volatile int* low_latency_usage_flag = nullptr;
-    int* low_latency_usage_flag_mapped = nullptr;
-
 public:
    Buffer(int rank, int num_ranks, int64_t num_nvl_bytes, int64_t num_rdma_bytes, bool low_latency_mode);

@@ -134,8 +130,6 @@ public:
                      const torch::Tensor& combined_rdma_head, const torch::Tensor& combined_nvl_head,
                      const Config& config, std::optional<EventHandle>& previous_event, bool async, bool allocate_on_comm_stream);

-    uint64_t get_low_latency_usage_flag() const;
-
    void clean_low_latency_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);

    std::tuple<torch::Tensor, std::optional<torch::Tensor>, torch::Tensor, torch::Tensor, torch::Tensor, std::optional<EventHandle>, std::optional<std::function<void()>>>