mirror of
https://github.com/deepseek-ai/FlashMLA
synced 2025-06-26 18:15:54 +00:00
enable fp8 compile
This commit is contained in:
5
setup.py
5
setup.py
@@ -37,7 +37,7 @@ ext_modules.append(
|
||||
sources=[
|
||||
"csrc/flash_api.cpp",
|
||||
"csrc/flash_fwd_mla_bf16_sm90.cu",
|
||||
#"csrc/flash_fwd_mla_fp8_sm90.cu",
|
||||
"csrc/flash_fwd_mla_fp8_sm90.cu",
|
||||
],
|
||||
extra_compile_args={
|
||||
"cxx": cxx_args,
|
||||
@@ -55,7 +55,8 @@ ext_modules.append(
|
||||
"--expt-relaxed-constexpr",
|
||||
"--expt-extended-lambda",
|
||||
"--use_fast_math",
|
||||
"--ptxas-options=-v,--register-usage-level=10"
|
||||
"--ptxas-options=-v,--register-usage-level=10",
|
||||
"--ftemplate-backtrace-limit=0"
|
||||
]
|
||||
+ cc_flag
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user