mirror of
https://github.com/deepseek-ai/FlashMLA
synced 2025-06-26 18:15:54 +00:00
Performance optimization for compute-bound cases
This commit is contained in:
@@ -55,7 +55,6 @@ def flash_mla_with_kvcache(
|
||||
out, softmax_lse = flash_mla_cuda.fwd_kvcache_mla(
|
||||
q,
|
||||
k_cache,
|
||||
None,
|
||||
head_dim_v,
|
||||
cache_seqlens,
|
||||
block_table,
|
||||
|
||||
Reference in New Issue
Block a user