mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Add automatic warp count control for low-latency kernels (#213)
* Add automatic warp count control for low-latency dispatch * Add automatic warp count control for low-latency combine * More assertions
This commit is contained in:
@@ -6,13 +6,13 @@
|
||||
namespace deep_ep {
|
||||
|
||||
template <typename dtype_t>
|
||||
dtype_t cell_div(dtype_t a, dtype_t b) {
|
||||
dtype_t ceil_div(dtype_t a, dtype_t b) {
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
template <typename dtype_t>
|
||||
dtype_t align(dtype_t a, dtype_t b) {
|
||||
return cell_div<dtype_t>(a, b) * b;
|
||||
return ceil_div<dtype_t>(a, b) * b;
|
||||
}
|
||||
|
||||
struct Config {
|
||||
|
||||
Reference in New Issue
Block a user