mirror of
https://github.com/deepseek-ai/FlashMLA
synced 2025-06-26 18:15:54 +00:00
init fp8
This commit is contained in:
parent
bcb90f2afd
commit
dae0690055
3
csrc/flash_fwd_mla_fp8_sm90.cu
Normal file
3
csrc/flash_fwd_mla_fp8_sm90.cu
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
#include "flash_fwd_mla_kernel.h"
|
||||||
|
|
||||||
|
template void run_mha_fwd_splitkv_mla<cutlass::float_e4m3_t, 576>(Flash_fwd_mla_params ¶ms, cudaStream_t stream);
|
1
setup.py
1
setup.py
@ -37,6 +37,7 @@ ext_modules.append(
|
|||||||
sources=[
|
sources=[
|
||||||
"csrc/flash_api.cpp",
|
"csrc/flash_api.cpp",
|
||||||
"csrc/flash_fwd_mla_bf16_sm90.cu",
|
"csrc/flash_fwd_mla_bf16_sm90.cu",
|
||||||
|
"csrc/flash_fwd_mla_fp8_sm90.cu",
|
||||||
],
|
],
|
||||||
extra_compile_args={
|
extra_compile_args={
|
||||||
"cxx": cxx_args,
|
"cxx": cxx_args,
|
||||||
|
Loading…
Reference in New Issue
Block a user