Normal kernels always use IBGDA mode.

This commit is contained in:
Shangyan Zhou 2025-04-22 10:36:24 +08:00
parent 20b2aaaf9e
commit 3e54b78fd7
2 changed files with 15 additions and 20 deletions

View File

@ -58,15 +58,12 @@ int init(const std::vector<uint8_t> &root_unique_id_val, int rank, int num_ranks
EP_HOST_ASSERT(cpu_rdma_team != NVSHMEM_TEAM_INVALID);
}
// Normal operations use IBRC, while low-latency operations use IBGDA
bool internode_use_ibgda = true;
if (low_latency_mode or internode_use_ibgda) {
// TODO: we still use `nvshmem_barrier` under IBRC mode, which should be switch to IBGDA mode later
nvshmemi_device_host_state_t* dev_state_ptr = nullptr;
CUDA_CHECK(cudaGetSymbolAddress(reinterpret_cast<void**>(&dev_state_ptr), nvshmemi_device_state_d));
bool ibgda_is_initialized = false;
CUDA_CHECK(cudaMemcpy(&dev_state_ptr->ibgda_is_initialized, &ibgda_is_initialized, sizeof(bool), cudaMemcpyHostToDevice));
}
nvshmem_barrier_all();
return nvshmem_my_pe();
}

View File

@ -65,10 +65,8 @@ class Buffer:
# Synchronize NVSHMEM unique IDs
root_unique_id = None
internode_use_ibgda = True
if self.runtime.get_num_rdma_ranks() > 1 or low_latency_mode:
# Enable IBGDA for the low latency mode, which refers to "no package forwarding between NVLink and RDMA"
if low_latency_mode or internode_use_ibgda:
# Enable IBGDA
assert num_qps_per_rank > 0
os.environ['NVSHMEM_DISABLE_P2P'] = '1'
os.environ['NVSHMEM_IB_ENABLE_IBGDA'] = '1'