|
cutlass@afa1772203
|
Initial commit
|
2025-02-24 09:20:23 +08:00 |
|
flash_api.cpp
|
Merge branch 'main' into will_fp8_mr
|
2025-02-28 22:15:46 +08:00 |
|
flash_fwd_mla_bf16_sm90.cu
|
update gmem
|
2025-02-25 09:45:19 +08:00 |
|
flash_fwd_mla_fp8_sm90.cu
|
update gmem
|
2025-02-25 09:45:19 +08:00 |
|
flash_fwd_mla_kernel.h
|
enable scale
|
2025-02-28 20:07:32 +08:00 |
|
flash_fwd_mla_metadata.cu
|
support fp16
|
2025-02-24 01:58:53 -08:00 |
|
flash_mla.h
|
update fp8 api
|
2025-02-26 08:33:25 +08:00 |
|
fp8_transpose_v.h
|
update desc
|
2025-03-01 07:54:39 +08:00 |
|
named_barrier.h
|
add transv barrier
|
2025-02-26 17:57:00 +08:00 |
|
softmax.h
|
enable scale
|
2025-02-28 20:07:32 +08:00 |
|
static_switch.h
|
Initial commit
|
2025-02-24 09:20:23 +08:00 |
|
utils.h
|
use mm1's Aregs instead of mma0's Cregs
|
2025-02-27 11:59:17 +08:00 |