mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Support Ampere architecture (#204)
* Update README * Update `setup.py` * Fix headers * Add `DISABLE_NVSHMEM` for APIs * Fix launch * Fix TMA settings * Fix TMA usages * Fix dlink * Separate layout kernels * Update version * Add `is_sm90_compiled` * Fix tests * Add NVLink connection checks * Update README * Fix tests * Add some comments * Minor fix * Minor fix * Fix bugs
This commit is contained in:
@@ -7,7 +7,7 @@ from typing import Callable, List, Tuple, Optional, Union
|
||||
import deep_ep_cpp
|
||||
# noinspection PyUnresolvedReferences
|
||||
from deep_ep_cpp import Config, EventHandle
|
||||
from .utils import EventOverlap
|
||||
from .utils import EventOverlap, check_nvlink_connections
|
||||
|
||||
|
||||
class Buffer:
|
||||
@@ -50,6 +50,7 @@ class Buffer:
|
||||
please make sure all connections are via NVLink.
|
||||
allow_mnnvl: whether to allow MNNVL
|
||||
"""
|
||||
check_nvlink_connections(group)
|
||||
|
||||
# Initialize the CPP runtime
|
||||
self.rank = group.rank()
|
||||
@@ -105,6 +106,10 @@ class Buffer:
|
||||
self.runtime.sync(device_ids, ipc_handles, root_unique_id)
|
||||
assert self.runtime.is_available()
|
||||
|
||||
@staticmethod
|
||||
def is_sm90_compiled():
|
||||
return deep_ep_cpp.is_sm90_compiled()
|
||||
|
||||
@staticmethod
|
||||
def set_num_sms(new_num_sms: int) -> None:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user