mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-04-06 19:14:56 +00:00
Update get_best_configs
a better strategy to choose config
This commit is contained in:
parent
461427ecd0
commit
fbec9e5eee
@ -79,10 +79,12 @@ def get_best_configs(m: int, n: int, k: int, num_groups: int, num_sms: int,
|
||||
elif num_waves < best_num_waves:
|
||||
success = True
|
||||
elif num_waves == best_num_waves:
|
||||
div_n = bool(128 % block_n)
|
||||
best_div_n = bool(128 % best_block_n)
|
||||
# Check last wave utilization
|
||||
util = get_last_wave_util(block_m, block_n)
|
||||
best_util = get_last_wave_util(best_block_m, best_block_n)
|
||||
success = util > best_util or (util == best_util and (block_n >= best_block_n and block_m <= best_block_m))
|
||||
success = util > best_util or (util == best_util and (block_m > best_block_m or block_m == best_block_m and (div_n < best_div_n or div_n == best_div_n and block_n < best_block_n)))
|
||||
best_block_m, best_block_n = (block_m, block_n) if success else (best_block_m, best_block_n)
|
||||
assert best_block_m is not None and best_block_n is not None
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user