fix: display usage for non-streaming ollama response

2025-03-04 03:18:03 +00:00 · 2025-01-29 21:07:22 -08:00 · 2025-01-29 21:07:22 -08:00 · 8727b91664
commit 8727b91664
parent 248fcf1636
1 changed files with 42 additions and 1 deletions
--- a/backend/open_webui/utils/response.py
+++ b/backend/open_webui/utils/response.py
@ -9,7 +9,48 @@ def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
    model = ollama_response.get("model", "ollama")
    message_content = ollama_response.get("message", {}).get("content", "")

-    response = openai_chat_completion_message_template(model, message_content)
+    data = ollama_response
+    usage = {
+        "response_token/s": (
+            round(
+                (
+                    (
+                        data.get("eval_count", 0)
+                        / ((data.get("eval_duration", 0) / 10_000_000))
+                    )
+                    * 100
+                ),
+                2,
+            )
+            if data.get("eval_duration", 0) > 0
+            else "N/A"
+        ),
+        "prompt_token/s": (
+            round(
+                (
+                    (
+                        data.get("prompt_eval_count", 0)
+                        / ((data.get("prompt_eval_duration", 0) / 10_000_000))
+                    )
+                    * 100
+                ),
+                2,
+            )
+            if data.get("prompt_eval_duration", 0) > 0
+            else "N/A"
+        ),
+        "total_duration": data.get("total_duration", 0),
+        "load_duration": data.get("load_duration", 0),
+        "prompt_eval_count": data.get("prompt_eval_count", 0),
+        "prompt_eval_duration": data.get("prompt_eval_duration", 0),
+        "eval_count": data.get("eval_count", 0),
+        "eval_duration": data.get("eval_duration", 0),
+        "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
+            (data.get("total_duration", 0) or 0) // 1_000_000_000
+        ),
+    }
+
+    response = openai_chat_completion_message_template(model, message_content, usage)
    return response