add lru-cache to avoid repeated calculation

2025-06-26 23:15:49 +00:00 · 2025-04-04 12:44:26 +08:00 · 2025-04-04 12:44:26 +08:00 · 776bd0cccc
commit 776bd0cccc
parent c187c23ba8
1 changed files with 2 additions and 1 deletions
--- a/deep_gemm/jit_kernels/gemm.py
+++ b/deep_gemm/jit_kernels/gemm.py
@ -1,5 +1,6 @@
 import math
 import torch
+from functools import lru_cache
 from typing import Tuple

 from .tuner import jit_tuner
@ -66,7 +67,7 @@ def get_smem_size(num_stages: int, k: int, block_m: int, block_n: int, block_k:
    smem_size += smem_barrier
    return smem_size

-
+@lru_cache(maxsize=None)
 def get_best_configs(m: int, n: int, k: int, num_groups: int, num_sms: int,
                     is_grouped_contiguous: bool = False) -> Tuple[int, int, int, int, Tuple[int, bool], int]:
    if not is_grouped_contiguous: