Add the transaction window data structure for RDMA senders (#245)

* Add draft

* Add fast-debugging flags

* Fix several bugs

* Add sender timeout checks

* Fix stuck

* Fix bugs

* Fix bugs
This commit is contained in:
Chenggang Zhao
2025-06-24 09:12:40 +08:00
committed by GitHub
parent 9eb2f84b3e
commit bc118b248a
6 changed files with 101 additions and 54 deletions

View File

@@ -220,7 +220,7 @@ def test_main(num_sms: int, local_rank: int, num_local_ranks: int, num_ranks: in
def test_loop(local_rank: int, num_local_ranks: int):
num_nodes = int(os.getenv('WORLD_SIZE', 1))
rank, num_ranks, group = init_dist(local_rank, num_local_ranks)
test_ll_compatibility = True
test_ll_compatibility = os.getenv('EP_TEST_LL_COMPATIBILITY', False)
if test_ll_compatibility:
ll_num_tokens, ll_hidden, ll_num_experts, ll_num_topk = 16, 5120, 256, 9