diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index d9ba29b2b..9f6d60977 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -1074,9 +1074,10 @@ async def process_chat_response( # We might want to disable this by default detect_reasoning = True - reasoning_start_time = None + reasoning_content = "" + ongoing_content = "" async for line in response.body_iterator: line = line.decode("utf-8") if isinstance(line, bytes) else line @@ -1116,11 +1117,17 @@ async def process_chat_response( if detect_reasoning: if "\n" in content: + # Remove the tag + content = content.replace("\n", "") + ongoing_content = content + reasoning_start_time = time.time() reasoning_content = "" - content = content.replace("\n", "") if reasoning_start_time is not None: + # Remove the last value from the content + content = content[: -len(value)] + reasoning_content += value if "\n" in reasoning_content: @@ -1147,13 +1154,14 @@ async def process_chat_response( ) # Format reasoning with
tag - content = f"
\nThought for {reasoning_duration} seconds\n{reasoning_content}\n
\n" + content = f"{ongoing_content}
\nThought for {reasoning_duration} seconds\n{reasoning_content}\n
\n" else: content = "" reasoning_start_time = None else: - ongoing_content = "\n".join( + + reasoning_content = "\n".join( ( f"> {line}" if not line.startswith(">") @@ -1163,7 +1171,7 @@ async def process_chat_response( ) # Show ongoing thought process - content = f"
\nThinking… \n{ongoing_content}\n
\n" + content = f"{ongoing_content}
\nThinking… \n{reasoning_content}\n
\n" if ENABLE_REALTIME_CHAT_SAVE: # Save message in the database