mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-06-03 15:56:12 +00:00
Merge pull request #42 from sazczmh/main
Performance: reducing the percentage of FFMA interleaving yields a sight performance gain, roughly 0.5%
This commit is contained in:
commit
9d3222a93e
@ -73,7 +73,7 @@ def parse_registers(line):
|
|||||||
|
|
||||||
|
|
||||||
def modify_segment(m, name, ffma_lines):
|
def modify_segment(m, name, ffma_lines):
|
||||||
num_lines = len(ffma_lines)
|
num_lines = (len(ffma_lines) * 9 // 16) // 2 * 2
|
||||||
assert num_lines % 2 == 0
|
assert num_lines % 2 == 0
|
||||||
|
|
||||||
le_bytes, new_le_bytes = [], []
|
le_bytes, new_le_bytes = [], []
|
||||||
|
Loading…
Reference in New Issue
Block a user