Remove the low-latency usage flag (#214)

This commit is contained in:
Chenggang Zhao
2025-06-16 13:30:14 +08:00
committed by GitHub
parent 1b92be8a71
commit 8aaddf76ae
6 changed files with 15 additions and 69 deletions

View File

@@ -71,10 +71,6 @@ private:
volatile int* moe_recv_rdma_counter = nullptr;
int* moe_recv_rdma_counter_mapped = nullptr;
// Host-side low-latency kernels' usages
volatile int* low_latency_usage_flag = nullptr;
int* low_latency_usage_flag_mapped = nullptr;
public:
Buffer(int rank, int num_ranks, int64_t num_nvl_bytes, int64_t num_rdma_bytes, bool low_latency_mode);
@@ -134,8 +130,6 @@ public:
const torch::Tensor& combined_rdma_head, const torch::Tensor& combined_nvl_head,
const Config& config, std::optional<EventHandle>& previous_event, bool async, bool allocate_on_comm_stream);
uint64_t get_low_latency_usage_flag() const;
void clean_low_latency_buffer(int num_max_dispatch_tokens_per_rank, int hidden, int num_experts);
std::tuple<torch::Tensor, std::optional<torch::Tensor>, torch::Tensor, torch::Tensor, torch::Tensor, std::optional<EventHandle>, std::optional<std::function<void()>>>