DeepGEMM/deep_gemm/__init__.py
2025-03-12 13:48:02 +00:00

15 lines
380 B
Python

import torch
from . import jit
from .jit_kernels import (
gemm_fp8_fp8_bf16_nt,
gemm_fp8_fp8_bf16_bw_nt,
m_grouped_gemm_fp8_fp8_bf16_nt_contiguous,
m_grouped_gemm_fp8_fp8_bf16_nt_masked,
ceil_div,
set_num_sms, get_num_sms,
get_col_major_tma_aligned_tensor,
get_m_alignment_for_contiguous_layout
)
from .utils import bench, bench_kineto, calc_diff