Support UE8M0 data format. (#206)

2025-06-26 18:28:11 +00:00 · 2025-06-12 09:38:19 +08:00
parent 9ec061204e
commit 21efbe9b48
14 changed files with 255 additions and 115 deletions
--- a/csrc/deep_ep.hpp
+++ b/csrc/deep_ep.hpp
@@ -141,7 +141,8 @@ public:
    low_latency_dispatch(const torch::Tensor& x, const torch::Tensor& topk_idx,
                         const std::optional<torch::Tensor>& cumulative_local_expert_recv_stats,
                         int num_max_dispatch_tokens_per_rank, int num_experts,
-                         bool use_fp8, bool async, bool return_recv_hook);
+                         bool use_fp8, bool round_scale, bool use_ue8m0,
+                         bool async, bool return_recv_hook);

    std::tuple<torch::Tensor, std::optional<EventHandle>, std::optional<std::function<void()>>>
    low_latency_combine(const torch::Tensor& x, const torch::Tensor& topk_idx, const torch::Tensor& topk_weights,