mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Support Ampere architecture (#204)
* Update README * Update `setup.py` * Fix headers * Add `DISABLE_NVSHMEM` for APIs * Fix launch * Fix TMA settings * Fix TMA usages * Fix dlink * Separate layout kernels * Update version * Add `is_sm90_compiled` * Fix tests * Add NVLink connection checks * Update README * Fix tests * Add some comments * Minor fix * Minor fix * Fix bugs
This commit is contained in:
@@ -5,7 +5,10 @@
|
||||
#include "exception.cuh"
|
||||
#include "launch.cuh"
|
||||
#include "utils.cuh"
|
||||
|
||||
#ifndef DISABLE_NVSHMEM
|
||||
#include "ibgda_device.cuh"
|
||||
#endif
|
||||
|
||||
namespace deep_ep {
|
||||
|
||||
@@ -30,6 +33,7 @@ void barrier(int** barrier_signal_ptrs, int rank, int num_ranks, cudaStream_t st
|
||||
|
||||
namespace internode {
|
||||
|
||||
#ifndef DISABLE_NVSHMEM
|
||||
nvshmem_team_t cpu_rdma_team = NVSHMEM_TEAM_INVALID;
|
||||
nvshmem_team_config_t cpu_rdma_team_config;
|
||||
|
||||
@@ -81,6 +85,7 @@ void finalize() {
|
||||
}
|
||||
nvshmem_finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace internode
|
||||
|
||||
|
||||
Reference in New Issue
Block a user