chore(setup): properly package the repository as a Python package

2025-06-26 18:15:54 +00:00 · 2025-02-24 18:18:38 +08:00
parent 18e32770cc
commit 26d3077949
8 changed files with 108 additions and 8 deletions
--- a/flash_mla/init.py
+++ b/flash_mla/init.py
@@ -1,6 +1,15 @@
-__version__ = "1.0.0"
+"""FlashMLA: An efficient MLA decoding kernel for Hopper GPUs."""

 from flash_mla.flash_mla_interface import (
    get_mla_metadata,
    flash_mla_with_kvcache,
 )
+
+
+__all__ = [
+    "get_mla_metadata",
+    "flash_mla_with_kvcache",
+]
+
+
+__version__ = "1.0.0"
--- a/flash_mla/flash_mla_cuda.pyi
+++ b/flash_mla/flash_mla_cuda.pyi
@@ -0,0 +1,19 @@
+import torch
+
+def get_mla_metadata(
+    cache_seqlens: torch.Tensor,
+    num_heads_per_head_k: int,
+    num_heads_k: int,
+) -> tuple[torch.Tensor, torch.Tensor]: ...
+def fwd_kvcache_mla(
+    q: torch.Tensor,
+    k_cache: torch.Tensor,
+    v_cache: torch.Tensor | None,
+    head_dim_v: int,
+    cache_seqlens: torch.Tensor,
+    block_table: torch.Tensor,
+    softmax_scale: float,
+    causal: bool,
+    tile_scheduler_metadata: torch.Tensor,
+    num_splits: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]: ...
--- a/flash_mla/flash_mla_interface.py
+++ b/flash_mla/flash_mla_interface.py
@@ -2,7 +2,7 @@ from typing import Optional, Tuple

 import torch

-import flash_mla_cuda
+from flash_mla import flash_mla_cuda


 def get_mla_metadata(
--- a/flash_mla/py.typed
+++ b/flash_mla/py.typed