Support Ampere architecture (#204)

* Update README

* Update `setup.py`

* Fix headers

* Add `DISABLE_NVSHMEM` for APIs

* Fix launch

* Fix TMA settings

* Fix TMA usages

* Fix dlink

* Separate layout kernels

* Update version

* Add `is_sm90_compiled`

* Fix tests

* Add NVLink connection checks

* Update README

* Fix tests

* Add some comments

* Minor fix

* Minor fix

* Fix bugs
This commit is contained in:
Chenggang Zhao
2025-06-11 15:48:18 +08:00
committed by GitHub
parent dd13c7145c
commit b8d90fb753
16 changed files with 413 additions and 174 deletions

View File

@@ -7,7 +7,7 @@ from typing import Callable, List, Tuple, Optional, Union
import deep_ep_cpp
# noinspection PyUnresolvedReferences
from deep_ep_cpp import Config, EventHandle
from .utils import EventOverlap
from .utils import EventOverlap, check_nvlink_connections
class Buffer:
@@ -50,6 +50,7 @@ class Buffer:
please make sure all connections are via NVLink.
allow_mnnvl: whether to allow MNNVL
"""
check_nvlink_connections(group)
# Initialize the CPP runtime
self.rank = group.rank()
@@ -105,6 +106,10 @@ class Buffer:
self.runtime.sync(device_ids, ipc_handles, root_unique_id)
assert self.runtime.is_available()
@staticmethod
def is_sm90_compiled():
return deep_ep_cpp.is_sm90_compiled()
@staticmethod
def set_num_sms(new_num_sms: int) -> None:
"""