From 074ce356dea776c831da52a743ecbff5da92c81a Mon Sep 17 00:00:00 2001 From: Simone Date: Wed, 19 Feb 2025 15:28:39 +0100 Subject: [PATCH] Added OpenAI usagerequested keys --- backend/open_webui/utils/response.py | 77 ++++++++++------------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py index f9979b4a2..eb6b1a242 100644 --- a/backend/open_webui/utils/response.py +++ b/backend/open_webui/utils/response.py @@ -23,18 +23,8 @@ def convert_ollama_tool_call_to_openai(tool_calls: dict) -> dict: openai_tool_calls.append(openai_tool_call) return openai_tool_calls - -def convert_response_ollama_to_openai(ollama_response: dict) -> dict: - model = ollama_response.get("model", "ollama") - message_content = ollama_response.get("message", {}).get("content", "") - tool_calls = ollama_response.get("message", {}).get("tool_calls", None) - openai_tool_calls = None - - if tool_calls: - openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls) - - data = ollama_response - usage = { +def convert_ollama_usage_to_openai(data: dict) -> dict: + return { "response_token/s": ( round( ( @@ -66,14 +56,37 @@ def convert_response_ollama_to_openai(ollama_response: dict) -> dict: "total_duration": data.get("total_duration", 0), "load_duration": data.get("load_duration", 0), "prompt_eval_count": data.get("prompt_eval_count", 0), + "prompt_tokens": int(data.get("prompt_eval_count", 0)), # This is the OpenAI compatible key "prompt_eval_duration": data.get("prompt_eval_duration", 0), "eval_count": data.get("eval_count", 0), + "completion_tokens": int(data.get("eval_count", 0)), # This is the OpenAI compatible key "eval_duration": data.get("eval_duration", 0), "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")( (data.get("total_duration", 0) or 0) // 1_000_000_000 ), + "total_tokens": int( # This is the OpenAI compatible key + data.get("prompt_eval_count", 0) + data.get("eval_count", 0) + ), + "completion_tokens_details": { # This is the OpenAI compatible key + "reasoning_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } } +def convert_response_ollama_to_openai(ollama_response: dict) -> dict: + model = ollama_response.get("model", "ollama") + message_content = ollama_response.get("message", {}).get("content", "") + tool_calls = ollama_response.get("message", {}).get("tool_calls", None) + openai_tool_calls = None + + if tool_calls: + openai_tool_calls = convert_ollama_tool_call_to_openai(tool_calls) + + data = ollama_response + + usage = convert_ollama_usage_to_openai(data) + response = openai_chat_completion_message_template( model, message_content, openai_tool_calls, usage ) @@ -96,45 +109,7 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response) usage = None if done: - usage = { - "response_token/s": ( - round( - ( - ( - data.get("eval_count", 0) - / ((data.get("eval_duration", 0) / 10_000_000)) - ) - * 100 - ), - 2, - ) - if data.get("eval_duration", 0) > 0 - else "N/A" - ), - "prompt_token/s": ( - round( - ( - ( - data.get("prompt_eval_count", 0) - / ((data.get("prompt_eval_duration", 0) / 10_000_000)) - ) - * 100 - ), - 2, - ) - if data.get("prompt_eval_duration", 0) > 0 - else "N/A" - ), - "total_duration": data.get("total_duration", 0), - "load_duration": data.get("load_duration", 0), - "prompt_eval_count": data.get("prompt_eval_count", 0), - "prompt_eval_duration": data.get("prompt_eval_duration", 0), - "eval_count": data.get("eval_count", 0), - "eval_duration": data.get("eval_duration", 0), - "approximate_total": ( - lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s" - )((data.get("total_duration", 0) or 0) // 1_000_000_000), - } + usage = convert_ollama_usage_to_openai(data) data = openai_chat_chunk_message_template( model, message_content if not done else None, openai_tool_calls, usage