From c56dbe19cf53b49e42533918178cb78e3a36bf32 Mon Sep 17 00:00:00 2001 From: Phil Szalay Date: Mon, 24 Mar 2025 12:25:09 +0100 Subject: [PATCH] Track token usage for LLM streaming responses --- backend/beyond_the_loop/routers/openai.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/beyond_the_loop/routers/openai.py b/backend/beyond_the_loop/routers/openai.py index 5dedda5cf..a9a3fa12e 100644 --- a/backend/beyond_the_loop/routers/openai.py +++ b/backend/beyond_the_loop/routers/openai.py @@ -690,6 +690,10 @@ async def generate_chat_completion( if "max_tokens" in payload and "max_completion_tokens" in payload: del payload["max_tokens"] + # Add stream_options to include usage information in streaming responses + if "stream" in payload and payload["stream"]: + payload["stream_options"] = {"include_usage": True} + # Convert the modified body back to JSON payload = json.dumps(payload)