mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-05-31 12:58:04 +00:00
Merge pull request #42 from sazczmh/main
Performance: reducing the percentage of FFMA interleaving yields a sight performance gain, roughly 0.5%
This commit is contained in:
commit
9d3222a93e
@ -73,7 +73,7 @@ def parse_registers(line):
|
||||
|
||||
|
||||
def modify_segment(m, name, ffma_lines):
|
||||
num_lines = len(ffma_lines)
|
||||
num_lines = (len(ffma_lines) * 9 // 16) // 2 * 2
|
||||
assert num_lines % 2 == 0
|
||||
|
||||
le_bytes, new_le_bytes = [], []
|
||||
|
Loading…
Reference in New Issue
Block a user