diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 9de0ca680..028a4fa7b 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -73,6 +73,7 @@ from open_webui.models.models import Models from open_webui.retrieval.utils import get_sources_from_items +from open_webui.utils.sanitize import strip_markdown_code_fences from open_webui.utils.chat import generate_chat_completion from open_webui.utils.task import ( get_task_model_id, @@ -1408,12 +1409,12 @@ async def convert_url_images_to_base64(form_data): def process_messages_with_output(messages: list[dict]) -> list[dict]: """ Process messages with OR-aligned output items for LLM consumption. - + For assistant messages with 'output' field, produces properly formatted OpenAI-style messages (tool_calls + tool results). Strips 'output' before LLM. """ processed = [] - + for message in messages: if message.get("role") == "assistant" and message.get("output"): # Use output items for clean OpenAI-format messages @@ -1421,11 +1422,11 @@ def process_messages_with_output(messages: list[dict]) -> list[dict]: if output_messages: processed.extend(output_messages) continue - + # Strip 'output' field before adding (LLM shouldn't see it) clean_message = {k: v for k, v in message.items() if k != "output"} processed.append(clean_message) - + return processed @@ -1841,9 +1842,8 @@ async def process_chat_payload(request, form_data, user, metadata, model): # Inject builtin tools for native function calling based on enabled features and model capability # Check if builtin_tools capability is enabled for this model (defaults to True if not specified) builtin_tools_enabled = ( - (model.get("info", {}).get("meta", {}).get("capabilities") or {}) - .get("builtin_tools", True) - ) + model.get("info", {}).get("meta", {}).get("capabilities") or {} + ).get("builtin_tools", True) if ( metadata.get("params", {}).get("function_calling") == "native" and builtin_tools_enabled @@ -1887,9 +1887,8 @@ async def process_chat_payload(request, form_data, user, metadata, model): # Check if file context extraction is enabled for this model (default True) file_context_enabled = ( - (model.get("info", {}).get("meta", {}).get("capabilities") or {}) - .get("file_context", True) - ) + model.get("info", {}).get("meta", {}).get("capabilities") or {} + ).get("file_context", True) if file_context_enabled: try: @@ -2238,7 +2237,7 @@ async def process_chat_response( ) title = Chats.get_chat_title_by_id(metadata["chat_id"]) - + # Use output from backend if provided (OR-compliant backends) response_output = response_data.get("output") @@ -2248,7 +2247,11 @@ async def process_chat_response( "data": { "done": True, "content": content, - **({"output": response_output} if response_output else {}), + **( + {"output": response_output} + if response_output + else {} + ), "title": title, }, } @@ -2261,7 +2264,11 @@ async def process_chat_response( { "role": "assistant", "content": content, - **({"output": response_output} if response_output else {}), + **( + {"output": response_output} + if response_output + else {} + ), }, ) @@ -2521,33 +2528,33 @@ async def process_chat_response( For LLM consumption, use convert_output_to_messages() instead. """ content = "" - + # First pass: collect function_call_output items by call_id for lookup tool_outputs = {} for item in output: if item.get("type") == "function_call_output": tool_outputs[item.get("call_id")] = item - + # Second pass: render items in order for item in output: item_type = item.get("type", "") - + if item_type == "message": for content_part in item.get("content", []): if content_part.get("type") == "output_text": text = content_part.get("text", "").strip() if text: content = f"{content}{text}\n" - + elif item_type == "function_call": # Render tool call inline with its result (if available) if content and not content.endswith("\n"): content += "\n" - + call_id = item.get("call_id", "") name = item.get("name", "") arguments = item.get("arguments", "") - + result_item = tool_outputs.get(call_id) if result_item: result_text = "" @@ -2556,59 +2563,65 @@ async def process_chat_response( result_text += out.get("text", "") files = result_item.get("files") embeds = result_item.get("embeds", "") - + content += f'
\nTool Executed\n
\n' else: content += f'
\nExecuting...\n
\n' - + elif item_type == "function_call_output": # Already handled inline with function_call above pass - + elif item_type == "reasoning": reasoning_content = "" for content_part in item.get("content", []): if content_part.get("type") == "output_text": reasoning_content = content_part.get("text", "").strip() - + duration = item.get("duration") status = item.get("status", "in_progress") - + if content and not content.endswith("\n"): content += "\n" - - display = html.escape("\n".join( - (f"> {line}" if not line.startswith(">") else line) - for line in reasoning_content.splitlines() - )) - + + display = html.escape( + "\n".join( + (f"> {line}" if not line.startswith(">") else line) + for line in reasoning_content.splitlines() + ) + ) + if status == "completed" or duration is not None: content = f'{content}
\nThought for {duration or 0} seconds\n{display}\n
\n' else: content = f'{content}
\nThinking…\n{display}\n
\n' - + elif item_type == "open_webui:code_interpreter": code = item.get("code", "") output_val = item.get("output") lang = item.get("lang", "") - - content_stripped, original_whitespace = split_content_and_whitespace(content) + + content_stripped, original_whitespace = ( + split_content_and_whitespace(content) + ) if is_opening_code_block(content_stripped): - content = content_stripped.rstrip("`").rstrip() + original_whitespace + content = ( + content_stripped.rstrip("`").rstrip() + + original_whitespace + ) else: content = content_stripped + original_whitespace - + if content and not content.endswith("\n"): content += "\n" - + if output_val: output_escaped = html.escape(json.dumps(output_val)) content = f'{content}
\nAnalyzed\n```{lang}\n{code}\n```\n
\n' else: content = f'{content}
\nAnalyzing...\n```{lang}\n{code}\n```\n
\n' - - return content.strip() + return content.strip() def convert_content_blocks_to_messages(content_blocks, raw=False): messages = [] @@ -2668,13 +2681,17 @@ async def process_chat_response( if block_type == "text": text_content = block.get("content", "").strip() if text_content: - output_items.append({ - "type": "message", - "id": block_id or next_id("msg"), - "status": "completed", - "role": "assistant", - "content": [{"type": "output_text", "text": text_content}], - }) + output_items.append( + { + "type": "message", + "id": block_id or next_id("msg"), + "status": "completed", + "role": "assistant", + "content": [ + {"type": "output_text", "text": text_content} + ], + } + ) elif block_type == "tool_calls": tool_calls = block.get("content", []) @@ -2684,50 +2701,86 @@ async def process_chat_response( for tool_call in tool_calls: call_id = tool_call.get("id", "") func = tool_call.get("function", {}) - output_items.append({ - "type": "function_call", - "id": call_id or next_id("fc"), # Use call_id as item id if available - "call_id": call_id, - "name": func.get("name", ""), - "arguments": func.get("arguments", "{}"), - "status": "completed" if results else "in_progress", - }) + output_items.append( + { + "type": "function_call", + "id": call_id + or next_id( + "fc" + ), # Use call_id as item id if available + "call_id": call_id, + "name": func.get("name", ""), + "arguments": func.get("arguments", "{}"), + "status": "completed" if results else "in_progress", + } + ) # Emit function_call_output items for result in results: - output_items.append({ - "type": "function_call_output", - "id": result.get("id") or next_id("fco"), - "call_id": result.get("tool_call_id", ""), - "output": [{"type": "input_text", "text": result.get("content", "")}], - "status": "completed", - **({"files": result.get("files")} if result.get("files") else {}), - **({"embeds": result.get("embeds")} if result.get("embeds") else {}), - }) + output_items.append( + { + "type": "function_call_output", + "id": result.get("id") or next_id("fco"), + "call_id": result.get("tool_call_id", ""), + "output": [ + { + "type": "input_text", + "text": result.get("content", ""), + } + ], + "status": "completed", + **( + {"files": result.get("files")} + if result.get("files") + else {} + ), + **( + {"embeds": result.get("embeds")} + if result.get("embeds") + else {} + ), + } + ) elif block_type == "reasoning": reasoning_content = block.get("content", "").strip() duration = block.get("duration") - output_items.append({ - "type": "reasoning", - "id": block_id or next_id("r"), - "status": "completed" if duration is not None else "in_progress", - "content": [{"type": "output_text", "text": reasoning_content}] if reasoning_content else None, - "summary": None, - }) + output_items.append( + { + "type": "reasoning", + "id": block_id or next_id("r"), + "status": ( + "completed" + if duration is not None + else "in_progress" + ), + "content": ( + [{"type": "output_text", "text": reasoning_content}] + if reasoning_content + else None + ), + "summary": None, + } + ) elif block_type == "code_interpreter": code = block.get("content", "") output_val = block.get("output") attrs = block.get("attributes", {}) - output_items.append({ - "type": "open_webui:code_interpreter", - "id": block_id or next_id("ci"), - "status": "completed" if output_val is not None else "in_progress", - "lang": attrs.get("lang", ""), - "code": code, - "output": output_val, - }) + output_items.append( + { + "type": "open_webui:code_interpreter", + "id": block_id or next_id("ci"), + "status": ( + "completed" + if output_val is not None + else "in_progress" + ), + "lang": attrs.get("lang", ""), + "code": code, + "output": output_val, + } + ) return output_items @@ -2934,14 +2987,16 @@ async def process_chat_response( output = existing_output else: # Always create an initial message item (even if content is empty) - output = [{ - "type": "message", - "id": output_id("msg"), - "status": "in_progress", - "role": "assistant", - "content": [{"type": "output_text", "text": content}], - }] - + output = [ + { + "type": "message", + "id": output_id("msg"), + "status": "in_progress", + "role": "assistant", + "content": [{"type": "output_text", "text": content}], + } + ] + # Keep content_blocks for backward compatibility during transition content_blocks = [ { @@ -3354,7 +3409,9 @@ async def process_chat_response( if ENABLE_REALTIME_CHAT_SAVE: # Save message in the database - output = convert_content_blocks_to_output(content_blocks) + output = convert_content_blocks_to_output( + content_blocks + ) Chats.upsert_message_to_chat_by_id_and_message_id( metadata["chat_id"], metadata["message_id"], @@ -3692,6 +3749,9 @@ async def process_chat_response( try: if content_blocks[-1]["attributes"].get("type") == "code": code = content_blocks[-1]["content"] + # Strip markdown fences if model included them + code = strip_markdown_code_fences(code) + if CODE_INTERPRETER_BLOCKED_MODULES: blocking_code = textwrap.dedent( f""" diff --git a/backend/open_webui/utils/sanitize.py b/backend/open_webui/utils/sanitize.py new file mode 100644 index 000000000..5755a9be4 --- /dev/null +++ b/backend/open_webui/utils/sanitize.py @@ -0,0 +1,21 @@ +import re + + +def strip_markdown_code_fences(code: str) -> str: + """ + Strip markdown code fences if present. + + This is a defensive, non-breaking change — if the code doesn't + contain fences, it passes through unchanged. + + Handles patterns like: + - ```python + - ```py + - ``` + """ + code = code.strip() + # Remove opening fence (```python, ```py, ``` etc.) + code = re.sub(r"^```\w*\n?", "", code) + # Remove closing fence + code = re.sub(r"\n?```\s*$", "", code) + return code.strip()