From adc6e24cb053947101f24cf048961eab22ddfc5f Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Thu, 8 May 2025 16:01:47 +0800 Subject: [PATCH] Update deep_ep.cpp --- csrc/deep_ep.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/csrc/deep_ep.cpp b/csrc/deep_ep.cpp index fd7a6ba..b2d5024 100644 --- a/csrc/deep_ep.cpp +++ b/csrc/deep_ep.cpp @@ -614,6 +614,9 @@ Buffer::internode_dispatch(const torch::Tensor& x, const std::optional& cached_rdma_channel_prefix_matrix, const std::optional& cached_recv_rdma_rank_prefix_sum, const std::optional& cached_gbl_channel_prefix_matrix, const std::optional& cached_recv_gbl_rank_prefix_sum, int expert_alignment, const Config& config, std::optional& previous_event, bool async, bool allocate_on_comm_stream) { + // In dispatch, CPU will busy-wait until GPU receive tensor size metadata from other ranks, which can be quite long. + // If users of DeepEP need to execute other Python code on other threads, such as KV transfer, their code will get stuck due to GIL + // unless we release GIL here. pybind11::gil_scoped_release release; const int num_channels = config.num_sms / 2;