mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-05-08 19:59:21 +00:00
Add some notes for promotion
This commit is contained in:
parent
ded740f736
commit
9b0dad8640
@ -283,6 +283,7 @@ fp8_gemm_kernel(__nv_bfloat16* gmem_d, float* scales_b, int* grouped_layout,
|
|||||||
empty_barrier_arrive(s);
|
empty_barrier_arrive(s);
|
||||||
|
|
||||||
// Promote with scales
|
// Promote with scales
|
||||||
|
// NOTES: making it as predicates is very important for performance, comparing to two loops
|
||||||
float scale_0_0 = scale_a_0 * scale_b_0, scale_1_0 = scale_a_1 * scale_b_0;
|
float scale_0_0 = scale_a_0 * scale_b_0, scale_1_0 = scale_a_1 * scale_b_0;
|
||||||
float scale_0_1, scale_1_1;
|
float scale_0_1, scale_1_1;
|
||||||
if constexpr (not kMustUseUniformedScaleB)
|
if constexpr (not kMustUseUniformedScaleB)
|
||||||
|
Loading…
Reference in New Issue
Block a user