mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-05-04 04:03:56 +00:00
423 lines
20 KiB
Diff
423 lines
20 KiB
Diff
From 9e6cc27cceb3130784e4ea7b61ea3171156365fd Mon Sep 17 00:00:00 2001
|
|
From: Shangyan Zhou <sy.zhou@deepseek.com>
|
|
Date: Fri, 20 Dec 2024 10:57:12 +0800
|
|
Subject: [PATCH 1/4] Change QP creating order.
|
|
|
|
---
|
|
src/modules/transport/ibgda/ibgda.cpp | 13 ++++++++-----
|
|
1 file changed, 8 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/src/modules/transport/ibgda/ibgda.cpp b/src/modules/transport/ibgda/ibgda.cpp
|
|
index ef325cd..286132e 100644
|
|
--- a/src/modules/transport/ibgda/ibgda.cpp
|
|
+++ b/src/modules/transport/ibgda/ibgda.cpp
|
|
@@ -2936,17 +2936,20 @@ int nvshmemt_ibgda_connect_endpoints(nvshmem_transport_t t, int *selected_dev_id
|
|
INFO(ibgda_state->log_level, "Creating %d RC QPs", device->rc.num_eps_per_pe);
|
|
for (int i = 0; i < num_rc_eps; ++i) {
|
|
// Do not create loopback to self
|
|
- if (i / device->rc.num_eps_per_pe == mype) {
|
|
+ int dst_pe = (i + 1 + mype) % n_pes;
|
|
+ int offset = i / n_pes;
|
|
+ int mapped_i = dst_pe * device->rc.num_eps_per_pe + offset;
|
|
+ if (dst_pe == mype) {
|
|
continue;
|
|
}
|
|
- status = ibgda_create_qp(&device->rc.eps[i], device, portid, i,
|
|
+ status = ibgda_create_qp(&device->rc.eps[mapped_i], device, portid, mapped_i,
|
|
NVSHMEMI_IBGDA_DEVICE_QP_TYPE_RC);
|
|
NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out,
|
|
- "ibgda_create_dci failed on RC #%d.", i);
|
|
+ "ibgda_create_dci failed on RC #%d.", mapped_i);
|
|
|
|
- status = ibgda_get_rc_handle(&local_rc_handles[i], device->rc.eps[i], device);
|
|
+ status = ibgda_get_rc_handle(&local_rc_handles[mapped_i], device->rc.eps[mapped_i], device);
|
|
NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out,
|
|
- "ibgda_get_rc_handle failed on RC #%d.", i);
|
|
+ "ibgda_get_rc_handle failed on RC #%d.", mapped_i);
|
|
}
|
|
|
|
if (num_rc_eps) {
|
|
--
|
|
2.25.1
|
|
|
|
|
|
From b11d41e4f3727f2f6ccc00a8c852e59e2ee33c8a Mon Sep 17 00:00:00 2001
|
|
From: Shangyan Zhou <sy.zhou@deepseek.com>
|
|
Date: Fri, 10 Jan 2025 11:53:38 +0800
|
|
Subject: [PATCH 2/4] Add recv queue and recv cq for rc qps.
|
|
|
|
Let the ibgda rc qps use regular recv queue.
|
|
|
|
Add recv queue to ibgda dev qp.
|
|
|
|
IBGDA create recv cq
|
|
|
|
Setup recv cq.
|
|
|
|
fix recv queue.
|
|
|
|
Remove some useless idx.
|
|
|
|
Longer recv queue.
|
|
---
|
|
.../nvshmem_common_ibgda.h | 19 +++++-
|
|
src/modules/transport/ibgda/ibgda.cpp | 65 ++++++++++++++++---
|
|
2 files changed, 71 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/src/include/device_host_transport/nvshmem_common_ibgda.h b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
index 8b8a263..1be3dec 100644
|
|
--- a/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
+++ b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
@@ -168,14 +168,17 @@ typedef struct {
|
|
uint64_t get_head; // last wqe idx + 1 with a "fetch" operation (g, get, amo_fetch)
|
|
uint64_t get_tail; // last wqe idx + 1 polled with cst; get_tail > get_head is possible
|
|
} tx_wq;
|
|
+ struct {
|
|
+ uint64_t resv_head; // last reserved wqe idx + 1
|
|
+ } rx_wq;
|
|
struct {
|
|
uint64_t head;
|
|
uint64_t tail;
|
|
} ibuf;
|
|
char padding[NVSHMEMI_IBGDA_QP_MANAGEMENT_PADDING];
|
|
} __attribute__((__aligned__(8))) nvshmemi_ibgda_device_qp_management_v1;
|
|
-static_assert(sizeof(nvshmemi_ibgda_device_qp_management_v1) == 96,
|
|
- "ibgda_device_qp_management_v1 must be 96 bytes.");
|
|
+static_assert(sizeof(nvshmemi_ibgda_device_qp_management_v1) == 104,
|
|
+ "ibgda_device_qp_management_v1 must be 104 bytes.");
|
|
|
|
typedef nvshmemi_ibgda_device_qp_management_v1 nvshmemi_ibgda_device_qp_management_t;
|
|
|
|
@@ -199,9 +202,19 @@ typedef struct nvshmemi_ibgda_device_qp {
|
|
// May point to mvars.prod_idx or internal prod_idx
|
|
uint64_t *prod_idx;
|
|
} tx_wq;
|
|
+ struct {
|
|
+ uint16_t nwqes;
|
|
+ uint64_t tail;
|
|
+ void *wqe;
|
|
+ __be32 *dbrec;
|
|
+ void *bf;
|
|
+ nvshmemi_ibgda_device_cq_t *cq;
|
|
+ // May point to mvars.prod_idx or internal prod_idx
|
|
+ uint64_t *prod_idx;
|
|
+ } rx_wq;
|
|
nvshmemi_ibgda_device_qp_management_v1 mvars; // management variables
|
|
} nvshmemi_ibgda_device_qp_v1;
|
|
-static_assert(sizeof(nvshmemi_ibgda_device_qp_v1) == 184, "ibgda_device_qp_v1 must be 184 bytes.");
|
|
+static_assert(sizeof(nvshmemi_ibgda_device_qp_v1) == 248, "ibgda_device_qp_v1 must be 248 bytes.");
|
|
|
|
typedef nvshmemi_ibgda_device_qp_v1 nvshmemi_ibgda_device_qp_t;
|
|
|
|
diff --git a/src/modules/transport/ibgda/ibgda.cpp b/src/modules/transport/ibgda/ibgda.cpp
|
|
index 286132e..e0b2d5c 100644
|
|
--- a/src/modules/transport/ibgda/ibgda.cpp
|
|
+++ b/src/modules/transport/ibgda/ibgda.cpp
|
|
@@ -198,6 +198,7 @@ struct ibgda_ep {
|
|
off_t dbr_offset;
|
|
|
|
struct ibgda_cq *send_cq;
|
|
+ struct ibgda_cq *recv_cq;
|
|
struct ibv_ah *ah;
|
|
|
|
uint32_t user_index;
|
|
@@ -1538,7 +1539,8 @@ static int ibgda_create_cq_shared_objects(nvshmemt_ibgda_state_t *ibgda_state,
|
|
|
|
struct ibv_context *context = device->context;
|
|
|
|
- unsigned int num_cqs = device->dci.num_eps + device->rc.num_eps_per_pe * n_pes;
|
|
+ // Each RC qp has one send CQ and one recv CQ.
|
|
+ unsigned int num_cqs = device->dci.num_eps + device->rc.num_eps_per_pe * n_pes * 2;
|
|
|
|
assert(ibgda_qp_depth > 0);
|
|
size_t num_cqe = IBGDA_ROUND_UP_POW2_OR_0(ibgda_qp_depth);
|
|
@@ -1701,7 +1703,8 @@ static int ibgda_create_qp_shared_objects(nvshmemt_ibgda_state_t *ibgda_state,
|
|
}
|
|
|
|
// Allocate and map WQ buffer for all QPs.
|
|
- wq_buf_size_per_qp = num_wqebb * MLX5_SEND_WQE_BB; // num_wqebb is always a power of 2
|
|
+ // Todo: reduce the size of wq buffer.
|
|
+ wq_buf_size_per_qp = num_wqebb * MLX5_SEND_WQE_BB * 2; // num_wqebb is always a power of 2
|
|
wq_buf_size = wq_buf_size_per_qp * num_eps;
|
|
status = ibgda_nic_control_alloc(&wq_mobject, wq_buf_size, IBGDA_GPAGE_SIZE);
|
|
NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "cannot allocate wq buf.\n");
|
|
@@ -1882,8 +1885,11 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
int cqe_version = 0;
|
|
|
|
struct ibgda_cq *send_cq = NULL;
|
|
+ struct ibgda_cq *recv_cq = NULL;
|
|
|
|
size_t num_wqebb = IBGDA_ROUND_UP_POW2_OR_0(ibgda_qp_depth);
|
|
+ size_t num_recv_wqe = ibgda_qp_depth;
|
|
+ size_t recv_wqe_size = 16;
|
|
|
|
int status = 0;
|
|
|
|
@@ -1911,6 +1917,11 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
status = ibgda_create_cq(&send_cq, device);
|
|
NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibgda_create_cq failed.\n");
|
|
|
|
+ if (qp_type == NVSHMEMI_IBGDA_DEVICE_QP_TYPE_RC) {
|
|
+ status = ibgda_create_cq(&recv_cq, device);
|
|
+ NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibgda_create_cq failed.\n");
|
|
+ }
|
|
+
|
|
ep = (struct ibgda_ep *)calloc(1, sizeof(struct ibgda_ep));
|
|
NVSHMEMI_NULL_ERROR_JMP(ep, status, NVSHMEMX_ERROR_OUT_OF_MEMORY, out,
|
|
"Unable to allocate mem for ep.\n");
|
|
@@ -1939,12 +1950,9 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
DEVX_SET(qpc, qp_context, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
|
|
DEVX_SET(qpc, qp_context, pd, device->qp_shared_object.pdn);
|
|
DEVX_SET(qpc, qp_context, uar_page, uar_mobject->uar->page_id); // BF register
|
|
- DEVX_SET(qpc, qp_context, rq_type, IBGDA_SRQ_TYPE_VALUE); // Shared Receive Queue
|
|
- DEVX_SET(qpc, qp_context, srqn_rmpn_xrqn, device->qp_shared_object.srqn);
|
|
DEVX_SET(qpc, qp_context, cqn_snd, send_cq->cqn);
|
|
- DEVX_SET(qpc, qp_context, cqn_rcv, device->qp_shared_object.rcqn);
|
|
+ DEVX_SET(qpc, qp_context, cqn_rcv, qp_type == NVSHMEMI_IBGDA_DEVICE_QP_TYPE_RC ? recv_cq->cqn : device->qp_shared_object.rcqn);
|
|
DEVX_SET(qpc, qp_context, log_sq_size, IBGDA_ILOG2_OR0(num_wqebb));
|
|
- DEVX_SET(qpc, qp_context, log_rq_size, 0);
|
|
DEVX_SET(qpc, qp_context, cs_req, 0); // Disable CS Request
|
|
DEVX_SET(qpc, qp_context, cs_res, 0); // Disable CS Response
|
|
DEVX_SET(qpc, qp_context, dbr_umem_valid, IBGDA_MLX5_UMEM_VALID_ENABLE); // Enable dbr_umem_id
|
|
@@ -1953,6 +1961,15 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
DEVX_SET(qpc, qp_context, dbr_umem_id, dbr_umem->umem_id); // DBR buffer
|
|
DEVX_SET(qpc, qp_context, user_index, qp_idx);
|
|
DEVX_SET(qpc, qp_context, page_offset, 0);
|
|
+ if (qp_type == NVSHMEMI_IBGDA_DEVICE_QP_TYPE_RC){
|
|
+ DEVX_SET(qpc, qp_context, rq_type, 0); // Regular recv queue
|
|
+ DEVX_SET(qpc, qp_context, log_rq_size, IBGDA_ILOG2(num_recv_wqe)); // 4 wqe
|
|
+ DEVX_SET(qpc, qp_context, log_rq_stride, IBGDA_ILOG2(recv_wqe_size) - 4); // max recv wqe size = 16B
|
|
+ } else {
|
|
+ DEVX_SET(qpc, qp_context, rq_type, IBGDA_SRQ_TYPE_VALUE); // Shared Receive Queue, DC must use this.
|
|
+ DEVX_SET(qpc, qp_context, srqn_rmpn_xrqn, device->qp_shared_object.srqn);
|
|
+ DEVX_SET(qpc, qp_context, log_rq_size, 0);
|
|
+ }
|
|
|
|
ep->devx_qp = mlx5dv_devx_obj_create(context, cmd_in, sizeof(cmd_in), cmd_out, sizeof(cmd_out));
|
|
NVSHMEMI_NULL_ERROR_JMP(ep->devx_qp, status, NVSHMEMX_ERROR_INTERNAL, out,
|
|
@@ -1962,9 +1979,9 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
ep->portid = portid;
|
|
|
|
ep->sq_cnt = num_wqebb;
|
|
- ep->sq_buf_offset = 0;
|
|
+ ep->sq_buf_offset = num_recv_wqe * recv_wqe_size;
|
|
|
|
- ep->rq_cnt = 0;
|
|
+ ep->rq_cnt = num_recv_wqe;
|
|
ep->rq_buf_offset = 0;
|
|
|
|
ep->wq_mobject = device->qp_shared_object.wq_mobject;
|
|
@@ -1978,6 +1995,7 @@ static int ibgda_create_qp(struct ibgda_ep **ep_ptr, struct ibgda_device *device
|
|
ep->uar_mobject = uar_mobject;
|
|
|
|
ep->send_cq = send_cq;
|
|
+ ep->recv_cq = recv_cq;
|
|
|
|
ep->qp_type = qp_type;
|
|
|
|
@@ -1989,6 +2007,7 @@ out:
|
|
if (status) {
|
|
if (uar_mobject) ibgda_unmap_and_free_qp_uar(uar_mobject);
|
|
if (send_cq) ibgda_destroy_cq(send_cq);
|
|
+ if (recv_cq) ibgda_destroy_cq(recv_cq);
|
|
if (ep) free(ep);
|
|
}
|
|
|
|
@@ -2287,6 +2306,10 @@ static int ibgda_destroy_ep(struct ibgda_ep *ep) {
|
|
ibgda_destroy_cq(ep->send_cq);
|
|
}
|
|
|
|
+ if (ep->recv_cq) {
|
|
+ ibgda_destroy_cq(ep->recv_cq);
|
|
+ }
|
|
+
|
|
if (ep->ah) {
|
|
ftable.destroy_ah(ep->ah);
|
|
}
|
|
@@ -2318,7 +2341,7 @@ static void ibgda_get_device_qp(nvshmemi_ibgda_device_qp_t *dev_qp, struct ibgda
|
|
dev_qp->qpn = ep->qpn;
|
|
|
|
assert(ep->wq_mobject->has_gpu_mapping);
|
|
- dev_qp->tx_wq.wqe = (void *)((uintptr_t)ep->wq_mobject->aligned.gpu_ptr + ep->wq_offset);
|
|
+ dev_qp->tx_wq.wqe = (void *)((uintptr_t)ep->wq_mobject->aligned.gpu_ptr + ep->wq_offset + ep->sq_buf_offset);
|
|
|
|
if (ibgda_nic_handler == IBGDA_NIC_HANDLER_GPU) {
|
|
assert(ep->dbr_mobject->has_gpu_mapping);
|
|
@@ -2330,6 +2353,12 @@ static void ibgda_get_device_qp(nvshmemi_ibgda_device_qp_t *dev_qp, struct ibgda
|
|
}
|
|
|
|
dev_qp->tx_wq.nwqes = ep->sq_cnt;
|
|
+ if (ep->qp_type == NVSHMEMI_IBGDA_DEVICE_QP_TYPE_RC) {
|
|
+ dev_qp->rx_wq.nwqes = ep->rq_cnt;
|
|
+ dev_qp->rx_wq.wqe = (void *)((uintptr_t)ep->wq_mobject->aligned.gpu_ptr + ep->wq_offset + ep->rq_buf_offset);
|
|
+ dev_qp->rx_wq.dbrec = (__be32 *)((uintptr_t)ep->dbr_mobject->aligned.gpu_ptr + ep->dbr_offset);
|
|
+ dev_qp->rx_wq.bf = (void *)ep->uar_mobject->aligned.gpu_ptr;
|
|
+ }
|
|
|
|
ibuf_dci_start = (uintptr_t)device->qp_shared_object.internal_buf.mem_object->aligned.gpu_ptr;
|
|
ibuf_rc_start = ibuf_dci_start + (size_per_dci * device->dci.num_eps);
|
|
@@ -2379,6 +2408,9 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
nvshmemi_ibgda_device_cq_t *cq_d = NULL;
|
|
nvshmemi_ibgda_device_cq_t *cq_h = NULL;
|
|
|
|
+ nvshmemi_ibgda_device_cq_t *recv_cq_d = NULL;
|
|
+ nvshmemi_ibgda_device_cq_t *recv_cq_h = NULL;
|
|
+
|
|
uint8_t *qp_group_switches_d = NULL;
|
|
|
|
const size_t mvars_offset = offsetof(nvshmemi_ibgda_device_qp_t, mvars);
|
|
@@ -2386,6 +2418,7 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
const size_t cons_t_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, tx_wq.cons_idx);
|
|
const size_t wqe_h_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, tx_wq.resv_head);
|
|
const size_t wqe_t_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, tx_wq.ready_head);
|
|
+ const size_t rx_resv_head_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, rx_wq.resv_head);
|
|
|
|
nvshmemi_ibgda_device_qp_map_type_t rc_map_type = NVSHMEMI_IBGDA_DEVICE_QP_MAP_TYPE_INVALID;
|
|
nvshmemi_ibgda_device_qp_map_type_t dc_map_type = NVSHMEMI_IBGDA_DEVICE_QP_MAP_TYPE_INVALID;
|
|
@@ -2421,7 +2454,7 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
num_dct_handles += device->dct.num_eps * n_pes;
|
|
num_dci_handles += device->dci.num_eps;
|
|
num_rc_handles += device->rc.num_eps_per_pe * n_pes;
|
|
- num_cq_handles += device->dci.num_eps + (device->rc.num_eps_per_pe * (n_pes - 1));
|
|
+ num_cq_handles += device->dci.num_eps + (device->rc.num_eps_per_pe * (n_pes - 1) * 2);
|
|
num_shared_dci_handles += device->dci.num_shared_eps;
|
|
}
|
|
assert(num_dci_handles - num_shared_dci_handles >= 0);
|
|
@@ -2456,6 +2489,10 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
for (int i = 0; i < num_cq_handles; i++) {
|
|
nvshmemi_init_ibgda_device_cq(cq_h[i]);
|
|
}
|
|
+
|
|
+ recv_cq_h = (nvshmemi_ibgda_device_cq_t *)calloc(1, sizeof(*recv_cq_h));
|
|
+ NVSHMEMI_NULL_ERROR_JMP(recv_cq_h, status, NVSHMEMX_ERROR_OUT_OF_MEMORY, out, "recv_cq calloc err.");
|
|
+ nvshmemi_init_ibgda_device_cq(recv_cq_h[0]);
|
|
/* allocate host memory for dct, rc, cq, dci end */
|
|
|
|
/* allocate device memory for dct, rc, cq, dci start */
|
|
@@ -2559,6 +2596,14 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
}
|
|
|
|
++cq_idx;
|
|
+
|
|
+ rc_h[arr_idx].rx_wq.cq = &cq_d[cq_idx];
|
|
+
|
|
+ ibgda_get_device_cq(&cq_h[cq_idx], device->rc.eps[i]->recv_cq);
|
|
+ cq_h[cq_idx].resv_head = (uint64_t *)(base_mvars_d_addr + rx_resv_head_offset);
|
|
+ cq_h[cq_idx].qpn = rc_h[arr_idx].qpn;
|
|
+ cq_h[cq_idx].qp_type = rc_h[arr_idx].qp_type;
|
|
+ ++cq_idx;
|
|
}
|
|
}
|
|
}
|
|
--
|
|
2.25.1
|
|
|
|
|
|
From af479f9f23103d4a1579fae38676d6b3022df887 Mon Sep 17 00:00:00 2001
|
|
From: Shangyan Zhou <sy.zhou@deepseek.com>
|
|
Date: Sat, 8 Feb 2025 18:02:39 +0800
|
|
Subject: [PATCH 3/4] Maintain recv queue's cons_idx.
|
|
|
|
---
|
|
src/include/device_host_transport/nvshmem_common_ibgda.h | 5 +++--
|
|
src/modules/transport/ibgda/ibgda.cpp | 6 ++++--
|
|
2 files changed, 7 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/src/include/device_host_transport/nvshmem_common_ibgda.h b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
index 1be3dec..ea1e284 100644
|
|
--- a/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
+++ b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
@@ -170,6 +170,7 @@ typedef struct {
|
|
} tx_wq;
|
|
struct {
|
|
uint64_t resv_head; // last reserved wqe idx + 1
|
|
+ uint64_t cons_idx; // polled wqe idx + 1 (consumer index + 1)
|
|
} rx_wq;
|
|
struct {
|
|
uint64_t head;
|
|
@@ -177,7 +178,7 @@ typedef struct {
|
|
} ibuf;
|
|
char padding[NVSHMEMI_IBGDA_QP_MANAGEMENT_PADDING];
|
|
} __attribute__((__aligned__(8))) nvshmemi_ibgda_device_qp_management_v1;
|
|
-static_assert(sizeof(nvshmemi_ibgda_device_qp_management_v1) == 104,
|
|
+static_assert(sizeof(nvshmemi_ibgda_device_qp_management_v1) == 112,
|
|
"ibgda_device_qp_management_v1 must be 104 bytes.");
|
|
|
|
typedef nvshmemi_ibgda_device_qp_management_v1 nvshmemi_ibgda_device_qp_management_t;
|
|
@@ -214,7 +215,7 @@ typedef struct nvshmemi_ibgda_device_qp {
|
|
} rx_wq;
|
|
nvshmemi_ibgda_device_qp_management_v1 mvars; // management variables
|
|
} nvshmemi_ibgda_device_qp_v1;
|
|
-static_assert(sizeof(nvshmemi_ibgda_device_qp_v1) == 248, "ibgda_device_qp_v1 must be 248 bytes.");
|
|
+static_assert(sizeof(nvshmemi_ibgda_device_qp_v1) == 256, "ibgda_device_qp_v1 must be 248 bytes.");
|
|
|
|
typedef nvshmemi_ibgda_device_qp_v1 nvshmemi_ibgda_device_qp_t;
|
|
|
|
diff --git a/src/modules/transport/ibgda/ibgda.cpp b/src/modules/transport/ibgda/ibgda.cpp
|
|
index e0b2d5c..bc339c5 100644
|
|
--- a/src/modules/transport/ibgda/ibgda.cpp
|
|
+++ b/src/modules/transport/ibgda/ibgda.cpp
|
|
@@ -1067,7 +1067,7 @@ static inline void ibgda_nic_control_free(struct ibgda_mem_object *mobject) {
|
|
ibgda_host_mem_free(mobject);
|
|
}
|
|
|
|
-static int ibgda_create_cq(struct ibgda_cq **pgcq, struct ibgda_device *device) {
|
|
+static int ibgda_create_cq(struct ibgda_cq **pgcq, struct ibgda_device *device, int cc = 1) {
|
|
int status = 0;
|
|
|
|
struct ibgda_cq *gcq = NULL;
|
|
@@ -1118,7 +1118,7 @@ static int ibgda_create_cq(struct ibgda_cq **pgcq, struct ibgda_device *device)
|
|
cq_context = DEVX_ADDR_OF(create_cq_in, cmd_in, cq_context);
|
|
DEVX_SET(cqc, cq_context, dbr_umem_valid, IBGDA_MLX5_UMEM_VALID_ENABLE);
|
|
DEVX_SET(cqc, cq_context, cqe_sz, MLX5_CQE_SIZE_64B);
|
|
- DEVX_SET(cqc, cq_context, cc, 0x1); // Use collapsed CQ
|
|
+ DEVX_SET(cqc, cq_context, cc, cc); // Use collapsed CQ
|
|
DEVX_SET(cqc, cq_context, oi, 0x1); // Allow overrun
|
|
DEVX_SET(cqc, cq_context, dbr_umem_id, dbr_umem->umem_id);
|
|
DEVX_SET(cqc, cq_context, log_cq_size, IBGDA_ILOG2_OR0(num_cqe));
|
|
@@ -2419,6 +2419,7 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
const size_t wqe_h_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, tx_wq.resv_head);
|
|
const size_t wqe_t_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, tx_wq.ready_head);
|
|
const size_t rx_resv_head_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, rx_wq.resv_head);
|
|
+ const size_t rx_cons_offset = offsetof(nvshmemi_ibgda_device_qp_management_t, rx_wq.cons_idx);
|
|
|
|
nvshmemi_ibgda_device_qp_map_type_t rc_map_type = NVSHMEMI_IBGDA_DEVICE_QP_MAP_TYPE_INVALID;
|
|
nvshmemi_ibgda_device_qp_map_type_t dc_map_type = NVSHMEMI_IBGDA_DEVICE_QP_MAP_TYPE_INVALID;
|
|
@@ -2601,6 +2602,7 @@ static int ibgda_setup_gpu_state(nvshmem_transport_t t) {
|
|
|
|
ibgda_get_device_cq(&cq_h[cq_idx], device->rc.eps[i]->recv_cq);
|
|
cq_h[cq_idx].resv_head = (uint64_t *)(base_mvars_d_addr + rx_resv_head_offset);
|
|
+ cq_h[cq_idx].cons_idx = (uint64_t *)(base_mvars_d_addr + rx_cons_offset);
|
|
cq_h[cq_idx].qpn = rc_h[arr_idx].qpn;
|
|
cq_h[cq_idx].qp_type = rc_h[arr_idx].qp_type;
|
|
++cq_idx;
|
|
--
|
|
2.25.1
|
|
|
|
|
|
From e0ba3fa21b4b633b481c6684c3ad04f2670c8df4 Mon Sep 17 00:00:00 2001
|
|
From: Shangyan Zhou <sy.zhou@deepseek.com>
|
|
Date: Tue, 11 Feb 2025 11:00:57 +0800
|
|
Subject: [PATCH 4/4] Init rx_wq counters.
|
|
|
|
---
|
|
src/include/device_host_transport/nvshmem_common_ibgda.h | 2 ++
|
|
1 file changed, 2 insertions(+)
|
|
|
|
diff --git a/src/include/device_host_transport/nvshmem_common_ibgda.h b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
index ea1e284..e6640d6 100644
|
|
--- a/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
+++ b/src/include/device_host_transport/nvshmem_common_ibgda.h
|
|
@@ -46,6 +46,8 @@
|
|
qp_man.tx_wq.cons_idx = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
qp_man.tx_wq.get_head = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
qp_man.tx_wq.get_tail = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
+ qp_man.rx_wq.resv_head = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
+ qp_man.rx_wq.cons_idx = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
qp_man.ibuf.head = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
qp_man.ibuf.tail = NVSHMEMI_IBGDA_ULSCALAR_INVALID; \
|
|
} while (0);
|
|
--
|
|
2.25.1
|
|
|