mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-04-06 10:04:54 +00:00
15 lines
380 B
Python
15 lines
380 B
Python
import torch
|
|
|
|
from . import jit
|
|
from .jit_kernels import (
|
|
gemm_fp8_fp8_bf16_nt,
|
|
gemm_fp8_fp8_bf16_bw_nt,
|
|
m_grouped_gemm_fp8_fp8_bf16_nt_contiguous,
|
|
m_grouped_gemm_fp8_fp8_bf16_nt_masked,
|
|
ceil_div,
|
|
set_num_sms, get_num_sms,
|
|
get_col_major_tma_aligned_tensor,
|
|
get_m_alignment_for_contiguous_layout
|
|
)
|
|
from .utils import bench, bench_kineto, calc_diff
|