mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-05-31 18:38:18 +00:00
Support zero-copy for low-latency combine
This commit is contained in:
parent
dcaf73e5ff
commit
66465476ae
@ -103,7 +103,7 @@ struct LowLatencyBuffer {
|
||||
int* combine_rdma_recv_flag_buffer = nullptr;
|
||||
|
||||
void* combine_rdma_send_buffer_data_start = nullptr;
|
||||
int num_bytes_per_combine_msg = 0;
|
||||
size_t num_bytes_per_combine_msg = 0;
|
||||
|
||||
std::pair<int*, int> clean_meta() {
|
||||
EP_HOST_ASSERT(dispatch_rdma_recv_count_buffer == combine_rdma_recv_flag_buffer);
|
||||
@ -168,7 +168,7 @@ struct LowLatencyLayout {
|
||||
advance(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * i),
|
||||
advance<int*>(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * 2 + signaling_buffer_bytes * i),
|
||||
advance(rdma_buffer, send_buffer_bytes * i + sizeof(int4)),
|
||||
static_cast<int>(num_bytes_per_combine_msg)
|
||||
num_bytes_per_combine_msg
|
||||
};
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user