From fcd1dcd99d00106c8ca818948c4b1676df6d91bc Mon Sep 17 00:00:00 2001 From: sazc Date: Wed, 5 Mar 2025 17:50:22 +0800 Subject: [PATCH] Performance: reducing the percentage of FFMA interleaving yields a slight performance gain, roughly 0.5% --- deep_gemm/jit/interleave_ffma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deep_gemm/jit/interleave_ffma.py b/deep_gemm/jit/interleave_ffma.py index 74e8358..fcb377e 100644 --- a/deep_gemm/jit/interleave_ffma.py +++ b/deep_gemm/jit/interleave_ffma.py @@ -73,7 +73,7 @@ def parse_registers(line): def modify_segment(m, name, ffma_lines): - num_lines = len(ffma_lines) + num_lines = (len(ffma_lines) * 9 // 16) // 2 * 2 assert num_lines % 2 == 0 le_bytes, new_le_bytes = [], []