mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Improve EP2/4 performance
This commit is contained in:
@@ -160,12 +160,11 @@ class Buffer:
|
||||
Returns:
|
||||
config: the recommended config.
|
||||
"""
|
||||
# Intranode
|
||||
if num_ranks <= 8:
|
||||
return Config(Buffer.num_sms, 6, 256, 6, 128)
|
||||
|
||||
# Internode
|
||||
config_map = {
|
||||
2: Config(Buffer.num_sms, 16, 256, 6, 128),
|
||||
4: Config(Buffer.num_sms, 16, 256, 6, 128),
|
||||
8: Config(Buffer.num_sms, 6, 256, 6, 128),
|
||||
16: Config(Buffer.num_sms, 16, 288, 20, 128),
|
||||
24: Config(Buffer.num_sms, 8, 288, 32, 128),
|
||||
32: Config(Buffer.num_sms, 8, 288, 32, 128),
|
||||
@@ -188,12 +187,11 @@ class Buffer:
|
||||
Returns:
|
||||
config: the recommended config.
|
||||
"""
|
||||
# Intranode
|
||||
if num_ranks <= 8:
|
||||
return Config(Buffer.num_sms, 6, 256, 6, 128)
|
||||
|
||||
# Internode
|
||||
config_map = {
|
||||
2: Config(Buffer.num_sms, 6, 256, 6, 128),
|
||||
4: Config(Buffer.num_sms, 6, 256, 6, 128),
|
||||
8: Config(Buffer.num_sms, 6, 256, 6, 128),
|
||||
16: Config(Buffer.num_sms, 2, 288, 28, 128),
|
||||
24: Config(Buffer.num_sms, 1, 288, 20, 128),
|
||||
32: Config(Buffer.num_sms, 1, 288, 20, 128),
|
||||
|
||||
Reference in New Issue
Block a user