mirror of
https://github.com/deepseek-ai/FlashMLA
synced 2025-05-15 09:05:54 +00:00
* Fix benchmark script * Performance optimization for compute-bound cases * Add new testcase (s_k = 16384) * Update README.md * Update comment * Update README.md * Add the deep-dive blog * Add background color for MLA Kernel Sched.drawio.svg * Use relative path for the schedule image * Move flash_mla.h to kernels/params.h
14 lines
252 B
C++
14 lines
252 B
C++
#pragma once
|
|
|
|
namespace Config {
|
|
|
|
static constexpr int BLOCK_SIZE_M = 64;
|
|
static constexpr int PAGE_BLOCK_SIZE = 64;
|
|
|
|
static constexpr int HEAD_DIM_K = 576;
|
|
static constexpr int HEAD_DIM_V = 512;
|
|
|
|
static constexpr int FIXED_OVERHEAD_NUM_BLOCKS = 5;
|
|
|
|
}
|