diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 81e07df94..c38622663 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -107,6 +107,7 @@ from open_webui.utils.filter import ( ) from open_webui.utils.code_interpreter import execute_code_jupyter from open_webui.utils.payload import apply_system_prompt_to_body +from open_webui.utils.response import normalize_usage from open_webui.utils.mcp.client import MCPClient @@ -3121,10 +3122,11 @@ async def process_chat_response( else: choices = data.get("choices", []) - # 17421 + # 17421 - Normalize usage data to standard format usage = data.get("usage", {}) or {} usage.update(data.get("timings", {})) # llama.cpp if usage: + usage = normalize_usage(usage) await event_emitter( { "type": "chat:completion", diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py index 2a54b9af6..241c59826 100644 --- a/backend/open_webui/utils/response.py +++ b/backend/open_webui/utils/response.py @@ -6,6 +6,47 @@ from open_webui.utils.misc import ( ) +def normalize_usage(usage: dict) -> dict: + """ + Normalize usage statistics to standard format. + Handles OpenAI, Ollama, and llama.cpp formats. + + Adds standardized token fields to the original data: + - input_tokens: Number of tokens in the prompt + - output_tokens: Number of tokens generated + - total_tokens: Sum of input and output tokens + """ + if not usage: + return {} + + # Map various field names to standard names + input_tokens = ( + usage.get("input_tokens") # Already standard + or usage.get("prompt_tokens") # OpenAI + or usage.get("prompt_eval_count") # Ollama + or usage.get("prompt_n") # llama.cpp + or 0 + ) + + output_tokens = ( + usage.get("output_tokens") # Already standard + or usage.get("completion_tokens") # OpenAI + or usage.get("eval_count") # Ollama + or usage.get("predicted_n") # llama.cpp + or 0 + ) + + total_tokens = usage.get("total_tokens") or (input_tokens + output_tokens) + + # Add standardized fields to original data + result = dict(usage) + result["input_tokens"] = int(input_tokens) + result["output_tokens"] = int(output_tokens) + result["total_tokens"] = int(total_tokens) + + return result + + def convert_ollama_tool_call_to_openai(tool_calls: list) -> list: openai_tool_calls = [] for tool_call in tool_calls: