refac

2026-01-26 18:04:58 +04:00
parent c5c4aef7b1
commit 4a55167759
2 changed files with 167 additions and 86 deletions
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@@ -73,6 +73,7 @@ from open_webui.models.models import Models
 from open_webui.retrieval.utils import get_sources_from_items


+from open_webui.utils.sanitize import strip_markdown_code_fences
 from open_webui.utils.chat import generate_chat_completion
 from open_webui.utils.task import (
    get_task_model_id,
@@ -1408,12 +1409,12 @@ async def convert_url_images_to_base64(form_data):
 def process_messages_with_output(messages: list[dict]) -> list[dict]:
    """
    Process messages with OR-aligned output items for LLM consumption.
-    
+
    For assistant messages with 'output' field, produces properly formatted
    OpenAI-style messages (tool_calls + tool results). Strips 'output' before LLM.
    """
    processed = []
-    
+
    for message in messages:
        if message.get("role") == "assistant" and message.get("output"):
            # Use output items for clean OpenAI-format messages
@@ -1421,11 +1422,11 @@ def process_messages_with_output(messages: list[dict]) -> list[dict]:
            if output_messages:
                processed.extend(output_messages)
                continue
-        
+
        # Strip 'output' field before adding (LLM shouldn't see it)
        clean_message = {k: v for k, v in message.items() if k != "output"}
        processed.append(clean_message)
-    
+
    return processed


@@ -1841,9 +1842,8 @@ async def process_chat_payload(request, form_data, user, metadata, model):
    # Inject builtin tools for native function calling based on enabled features and model capability
    # Check if builtin_tools capability is enabled for this model (defaults to True if not specified)
    builtin_tools_enabled = (
-        (model.get("info", {}).get("meta", {}).get("capabilities") or {})
-        .get("builtin_tools", True)
-    )
+        model.get("info", {}).get("meta", {}).get("capabilities") or {}
+    ).get("builtin_tools", True)
    if (
        metadata.get("params", {}).get("function_calling") == "native"
        and builtin_tools_enabled
@@ -1887,9 +1887,8 @@ async def process_chat_payload(request, form_data, user, metadata, model):

    # Check if file context extraction is enabled for this model (default True)
    file_context_enabled = (
-        (model.get("info", {}).get("meta", {}).get("capabilities") or {})
-        .get("file_context", True)
-    )
+        model.get("info", {}).get("meta", {}).get("capabilities") or {}
+    ).get("file_context", True)

    if file_context_enabled:
        try:
@@ -2238,7 +2237,7 @@ async def process_chat_response(
                            )

                            title = Chats.get_chat_title_by_id(metadata["chat_id"])
-                            
+
                            # Use output from backend if provided (OR-compliant backends)
                            response_output = response_data.get("output")

@@ -2248,7 +2247,11 @@ async def process_chat_response(
                                    "data": {
                                        "done": True,
                                        "content": content,
-                                        **({"output": response_output} if response_output else {}),
+                                        **(
+                                            {"output": response_output}
+                                            if response_output
+                                            else {}
+                                        ),
                                        "title": title,
                                    },
                                }
@@ -2261,7 +2264,11 @@ async def process_chat_response(
                                {
                                    "role": "assistant",
                                    "content": content,
-                                    **({"output": response_output} if response_output else {}),
+                                    **(
+                                        {"output": response_output}
+                                        if response_output
+                                        else {}
+                                    ),
                                },
                            )

@@ -2521,33 +2528,33 @@ async def process_chat_response(
                For LLM consumption, use convert_output_to_messages() instead.
                """
                content = ""
-                
+
                # First pass: collect function_call_output items by call_id for lookup
                tool_outputs = {}
                for item in output:
                    if item.get("type") == "function_call_output":
                        tool_outputs[item.get("call_id")] = item
-                
+
                # Second pass: render items in order
                for item in output:
                    item_type = item.get("type", "")
-                    
+
                    if item_type == "message":
                        for content_part in item.get("content", []):
                            if content_part.get("type") == "output_text":
                                text = content_part.get("text", "").strip()
                                if text:
                                    content = f"{content}{text}\n"
-                    
+
                    elif item_type == "function_call":
                        # Render tool call inline with its result (if available)
                        if content and not content.endswith("\n"):
                            content += "\n"
-                        
+
                        call_id = item.get("call_id", "")
                        name = item.get("name", "")
                        arguments = item.get("arguments", "")
-                        
+
                        result_item = tool_outputs.get(call_id)
                        if result_item:
                            result_text = ""
@@ -2556,59 +2563,65 @@ async def process_chat_response(
                                    result_text += out.get("text", "")
                            files = result_item.get("files")
                            embeds = result_item.get("embeds", "")
-                            
+
                            content += f'<details type="tool_calls" done="true" id="{call_id}" name="{name}" arguments="{html.escape(json.dumps(arguments))}" result="{html.escape(json.dumps(result_text, ensure_ascii=False))}" files="{html.escape(json.dumps(files)) if files else ""}" embeds="{html.escape(json.dumps(embeds))}">\n<summary>Tool Executed</summary>\n</details>\n'
                        else:
                            content += f'<details type="tool_calls" done="false" id="{call_id}" name="{name}" arguments="{html.escape(json.dumps(arguments))}">\n<summary>Executing...</summary>\n</details>\n'
-                    
+
                    elif item_type == "function_call_output":
                        # Already handled inline with function_call above
                        pass
-                    
+
                    elif item_type == "reasoning":
                        reasoning_content = ""
                        for content_part in item.get("content", []):
                            if content_part.get("type") == "output_text":
                                reasoning_content = content_part.get("text", "").strip()
-                        
+
                        duration = item.get("duration")
                        status = item.get("status", "in_progress")
-                        
+
                        if content and not content.endswith("\n"):
                            content += "\n"
-                        
-                        display = html.escape("\n".join(
-                            (f"> {line}" if not line.startswith(">") else line)
-                            for line in reasoning_content.splitlines()
-                        ))
-                        
+
+                        display = html.escape(
+                            "\n".join(
+                                (f"> {line}" if not line.startswith(">") else line)
+                                for line in reasoning_content.splitlines()
+                            )
+                        )
+
                        if status == "completed" or duration is not None:
                            content = f'{content}<details type="reasoning" done="true" duration="{duration or 0}">\n<summary>Thought for {duration or 0} seconds</summary>\n{display}\n</details>\n'
                        else:
                            content = f'{content}<details type="reasoning" done="false">\n<summary>Thinking…</summary>\n{display}\n</details>\n'
-                    
+
                    elif item_type == "open_webui:code_interpreter":
                        code = item.get("code", "")
                        output_val = item.get("output")
                        lang = item.get("lang", "")
-                        
-                        content_stripped, original_whitespace = split_content_and_whitespace(content)
+
+                        content_stripped, original_whitespace = (
+                            split_content_and_whitespace(content)
+                        )
                        if is_opening_code_block(content_stripped):
-                            content = content_stripped.rstrip("`").rstrip() + original_whitespace
+                            content = (
+                                content_stripped.rstrip("`").rstrip()
+                                + original_whitespace
+                            )
                        else:
                            content = content_stripped + original_whitespace
-                        
+
                        if content and not content.endswith("\n"):
                            content += "\n"
-                        
+
                        if output_val:
                            output_escaped = html.escape(json.dumps(output_val))
                            content = f'{content}<details type="code_interpreter" done="true" output="{output_escaped}">\n<summary>Analyzed</summary>\n```{lang}\n{code}\n```\n</details>\n'
                        else:
                            content = f'{content}<details type="code_interpreter" done="false">\n<summary>Analyzing...</summary>\n```{lang}\n{code}\n```\n</details>\n'
-                
-                return content.strip()

+                return content.strip()

            def convert_content_blocks_to_messages(content_blocks, raw=False):
                messages = []
@@ -2668,13 +2681,17 @@ async def process_chat_response(
                    if block_type == "text":
                        text_content = block.get("content", "").strip()
                        if text_content:
-                            output_items.append({
-                                "type": "message",
-                                "id": block_id or next_id("msg"),
-                                "status": "completed",
-                                "role": "assistant",
-                                "content": [{"type": "output_text", "text": text_content}],
-                            })
+                            output_items.append(
+                                {
+                                    "type": "message",
+                                    "id": block_id or next_id("msg"),
+                                    "status": "completed",
+                                    "role": "assistant",
+                                    "content": [
+                                        {"type": "output_text", "text": text_content}
+                                    ],
+                                }
+                            )

                    elif block_type == "tool_calls":
                        tool_calls = block.get("content", [])
@@ -2684,50 +2701,86 @@ async def process_chat_response(
                        for tool_call in tool_calls:
                            call_id = tool_call.get("id", "")
                            func = tool_call.get("function", {})
-                            output_items.append({
-                                "type": "function_call",
-                                "id": call_id or next_id("fc"),  # Use call_id as item id if available
-                                "call_id": call_id,
-                                "name": func.get("name", ""),
-                                "arguments": func.get("arguments", "{}"),
-                                "status": "completed" if results else "in_progress",
-                            })
+                            output_items.append(
+                                {
+                                    "type": "function_call",
+                                    "id": call_id
+                                    or next_id(
+                                        "fc"
+                                    ),  # Use call_id as item id if available
+                                    "call_id": call_id,
+                                    "name": func.get("name", ""),
+                                    "arguments": func.get("arguments", "{}"),
+                                    "status": "completed" if results else "in_progress",
+                                }
+                            )

                        # Emit function_call_output items
                        for result in results:
-                            output_items.append({
-                                "type": "function_call_output",
-                                "id": result.get("id") or next_id("fco"),
-                                "call_id": result.get("tool_call_id", ""),
-                                "output": [{"type": "input_text", "text": result.get("content", "")}],
-                                "status": "completed",
-                                **({"files": result.get("files")} if result.get("files") else {}),
-                                **({"embeds": result.get("embeds")} if result.get("embeds") else {}),
-                            })
+                            output_items.append(
+                                {
+                                    "type": "function_call_output",
+                                    "id": result.get("id") or next_id("fco"),
+                                    "call_id": result.get("tool_call_id", ""),
+                                    "output": [
+                                        {
+                                            "type": "input_text",
+                                            "text": result.get("content", ""),
+                                        }
+                                    ],
+                                    "status": "completed",
+                                    **(
+                                        {"files": result.get("files")}
+                                        if result.get("files")
+                                        else {}
+                                    ),
+                                    **(
+                                        {"embeds": result.get("embeds")}
+                                        if result.get("embeds")
+                                        else {}
+                                    ),
+                                }
+                            )

                    elif block_type == "reasoning":
                        reasoning_content = block.get("content", "").strip()
                        duration = block.get("duration")
-                        output_items.append({
-                            "type": "reasoning",
-                            "id": block_id or next_id("r"),
-                            "status": "completed" if duration is not None else "in_progress",
-                            "content": [{"type": "output_text", "text": reasoning_content}] if reasoning_content else None,
-                            "summary": None,
-                        })
+                        output_items.append(
+                            {
+                                "type": "reasoning",
+                                "id": block_id or next_id("r"),
+                                "status": (
+                                    "completed"
+                                    if duration is not None
+                                    else "in_progress"
+                                ),
+                                "content": (
+                                    [{"type": "output_text", "text": reasoning_content}]
+                                    if reasoning_content
+                                    else None
+                                ),
+                                "summary": None,
+                            }
+                        )

                    elif block_type == "code_interpreter":
                        code = block.get("content", "")
                        output_val = block.get("output")
                        attrs = block.get("attributes", {})
-                        output_items.append({
-                            "type": "open_webui:code_interpreter",
-                            "id": block_id or next_id("ci"),
-                            "status": "completed" if output_val is not None else "in_progress",
-                            "lang": attrs.get("lang", ""),
-                            "code": code,
-                            "output": output_val,
-                        })
+                        output_items.append(
+                            {
+                                "type": "open_webui:code_interpreter",
+                                "id": block_id or next_id("ci"),
+                                "status": (
+                                    "completed"
+                                    if output_val is not None
+                                    else "in_progress"
+                                ),
+                                "lang": attrs.get("lang", ""),
+                                "code": code,
+                                "output": output_val,
+                            }
+                        )

                return output_items

@@ -2934,14 +2987,16 @@ async def process_chat_response(
                output = existing_output
            else:
                # Always create an initial message item (even if content is empty)
-                output = [{
-                    "type": "message",
-                    "id": output_id("msg"),
-                    "status": "in_progress",
-                    "role": "assistant",
-                    "content": [{"type": "output_text", "text": content}],
-                }]
-            
+                output = [
+                    {
+                        "type": "message",
+                        "id": output_id("msg"),
+                        "status": "in_progress",
+                        "role": "assistant",
+                        "content": [{"type": "output_text", "text": content}],
+                    }
+                ]
+
            # Keep content_blocks for backward compatibility during transition
            content_blocks = [
                {
@@ -3354,7 +3409,9 @@ async def process_chat_response(

                                        if ENABLE_REALTIME_CHAT_SAVE:
                                            # Save message in the database
-                                            output = convert_content_blocks_to_output(content_blocks)
+                                            output = convert_content_blocks_to_output(
+                                                content_blocks
+                                            )
                                            Chats.upsert_message_to_chat_by_id_and_message_id(
                                                metadata["chat_id"],
                                                metadata["message_id"],
@@ -3692,6 +3749,9 @@ async def process_chat_response(
                        try:
                            if content_blocks[-1]["attributes"].get("type") == "code":
                                code = content_blocks[-1]["content"]
+                                # Strip markdown fences if model included them
+                                code = strip_markdown_code_fences(code)
+
                                if CODE_INTERPRETER_BLOCKED_MODULES:
                                    blocking_code = textwrap.dedent(
                                        f"""
--- a/backend/open_webui/utils/sanitize.py
+++ b/backend/open_webui/utils/sanitize.py
@@ -0,0 +1,21 @@
+import re
+
+
+def strip_markdown_code_fences(code: str) -> str:
+    """
+    Strip markdown code fences if present.
+
+    This is a defensive, non-breaking change — if the code doesn't
+    contain fences, it passes through unchanged.
+
+    Handles patterns like:
+    - ```python
+    - ```py
+    - ```
+    """
+    code = code.strip()
+    # Remove opening fence (```python, ```py, ``` etc.)
+    code = re.sub(r"^```\w*\n?", "", code)
+    # Remove closing fence
+    code = re.sub(r"\n?```\s*$", "", code)
+    return code.strip()