From 8727b916642c94e18347d6e8a00f2af5e84acf1b Mon Sep 17 00:00:00 2001
From: Timothy Jaeryang Baek <tim@openwebui.com>
Date: Wed, 29 Jan 2025 21:07:22 -0800
Subject: [PATCH] fix: display usage for non-streaming ollama response

---
 backend/open_webui/utils/response.py | 43 +++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py
index d6f7b0ac6..f461f7cc2 100644
--- a/backend/open_webui/utils/response.py
+++ b/backend/open_webui/utils/response.py
@@ -9,7 +9,48 @@ def convert_response_ollama_to_openai(ollama_response: dict) -> dict:
     model = ollama_response.get("model", "ollama")
     message_content = ollama_response.get("message", {}).get("content", "")
 
-    response = openai_chat_completion_message_template(model, message_content)
+    data = ollama_response
+    usage = {
+        "response_token/s": (
+            round(
+                (
+                    (
+                        data.get("eval_count", 0)
+                        / ((data.get("eval_duration", 0) / 10_000_000))
+                    )
+                    * 100
+                ),
+                2,
+            )
+            if data.get("eval_duration", 0) > 0
+            else "N/A"
+        ),
+        "prompt_token/s": (
+            round(
+                (
+                    (
+                        data.get("prompt_eval_count", 0)
+                        / ((data.get("prompt_eval_duration", 0) / 10_000_000))
+                    )
+                    * 100
+                ),
+                2,
+            )
+            if data.get("prompt_eval_duration", 0) > 0
+            else "N/A"
+        ),
+        "total_duration": data.get("total_duration", 0),
+        "load_duration": data.get("load_duration", 0),
+        "prompt_eval_count": data.get("prompt_eval_count", 0),
+        "prompt_eval_duration": data.get("prompt_eval_duration", 0),
+        "eval_count": data.get("eval_count", 0),
+        "eval_duration": data.get("eval_duration", 0),
+        "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")(
+            (data.get("total_duration", 0) or 0) // 1_000_000_000
+        ),
+    }
+
+    response = openai_chat_completion_message_template(model, message_content, usage)
     return response