mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Support zero-copy for low-latency combine
This commit is contained in:
parent
dcaf73e5ff
commit
66465476ae
@ -103,7 +103,7 @@ struct LowLatencyBuffer {
|
|||||||
int* combine_rdma_recv_flag_buffer = nullptr;
|
int* combine_rdma_recv_flag_buffer = nullptr;
|
||||||
|
|
||||||
void* combine_rdma_send_buffer_data_start = nullptr;
|
void* combine_rdma_send_buffer_data_start = nullptr;
|
||||||
int num_bytes_per_combine_msg = 0;
|
size_t num_bytes_per_combine_msg = 0;
|
||||||
|
|
||||||
std::pair<int*, int> clean_meta() {
|
std::pair<int*, int> clean_meta() {
|
||||||
EP_HOST_ASSERT(dispatch_rdma_recv_count_buffer == combine_rdma_recv_flag_buffer);
|
EP_HOST_ASSERT(dispatch_rdma_recv_count_buffer == combine_rdma_recv_flag_buffer);
|
||||||
@ -168,7 +168,7 @@ struct LowLatencyLayout {
|
|||||||
advance(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * i),
|
advance(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * i),
|
||||||
advance<int*>(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * 2 + signaling_buffer_bytes * i),
|
advance<int*>(rdma_buffer, send_buffer_bytes * 2 + recv_buffer_bytes * 2 + signaling_buffer_bytes * i),
|
||||||
advance(rdma_buffer, send_buffer_bytes * i + sizeof(int4)),
|
advance(rdma_buffer, send_buffer_bytes * i + sizeof(int4)),
|
||||||
static_cast<int>(num_bytes_per_combine_msg)
|
num_bytes_per_combine_msg
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user