Add low-latency kernel PCIe usage flag (#195)

* Add low-latency kernel usage flag

* Update comments
This commit is contained in:
Chenggang Zhao
2025-06-09 14:37:13 +08:00
committed by GitHub
parent 564e375234
commit 0d1a855d81
6 changed files with 57 additions and 13 deletions

View File

@@ -155,6 +155,7 @@ def test_loop(local_rank: int, num_local_ranks: int):
print(f'Allocating buffer size: {num_rdma_bytes / 1e6} MB ...', flush=True)
buffer = deep_ep.Buffer(group, num_rdma_bytes=num_rdma_bytes, low_latency_mode=True,
num_qps_per_rank=num_experts // num_ranks)
buffer.get_low_latency_usage_flag()
test_main(num_tokens, hidden, num_experts, num_topk, rank, num_ranks, group, buffer, seed=1)
do_pressure_test = False