Maximum representable value in FP8 E4M3 format

Replace Hardcoded 448.0 with Global Constant FP8_E4M3_MAX for FP8 E4M3 Format
2025-06-26 23:15:49 +00:00 · 2025-03-07 19:58:02 +04:00 · 2025-03-07 19:58:02 +04:00 · 629857685e
commit 629857685e
parent 9d3222a93e
1 changed files with 7 additions and 4 deletions
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -5,14 +5,17 @@ from typing import Tuple
 import deep_gemm
 from deep_gemm import bench_kineto, calc_diff, ceil_div, get_col_major_tma_aligned_tensor

+FP8_E4M3_MAX = 448.0  # Maximum representable value in FP8 E4M3 format

 def per_token_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    assert x.dim() == 2 and x.size(1) % 128 == 0
    m, n = x.shape
    x_view = x.view(m, -1, 128)
    x_amax = x_view.abs().float().amax(dim=2).view(m, -1).clamp(1e-4)
-    return (x_view * (448.0 / x_amax.unsqueeze(2))).to(torch.float8_e4m3fn).view(m, n), (x_amax / 448.0).view(m, -1)
-
+    return (
+        (x_view * (FP8_E4M3_MAX / x_amax.unsqueeze(2))).to(torch.float8_e4m3fn).view(m, n),
+        (x_amax / FP8_E4M3_MAX).view(m, -1)
+    )

 def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    assert x.dim() == 2
@ -21,8 +24,8 @@ def per_block_cast_to_fp8(x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    x_padded[:m, :n] = x
    x_view = x_padded.view(-1, 128, x_padded.size(1) // 128, 128)
    x_amax = x_view.abs().float().amax(dim=(1, 3), keepdim=True).clamp(1e-4)
-    x_scaled = (x_view * (448.0 / x_amax)).to(torch.float8_e4m3fn)
-    return x_scaled.view_as(x_padded)[:m, :n].contiguous(), (x_amax / 448.0).view(x_view.size(0), x_view.size(2))
+    x_scaled = (x_view * (FP8_E4M3_MAX / x_amax)).to(torch.float8_e4m3fn)
+    return x_scaled.view_as(x_padded)[:m, :n].contiguous(), (x_amax / FP8_E4M3_MAX).view(x_view.size(0), x_view.size(2))


 def construct(m: int, k: int, n: int) -> \