refac

2025-06-26 18:26:48 +00:00 · 2025-06-25 12:20:08 +04:00 · 2025-06-25 12:20:08 +04:00 · 1f123eb100
commit 1f123eb100
parent 6676824947
2 changed files with 46 additions and 33 deletions
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@ -460,20 +460,19 @@ def get_sources_from_files(
    )
    extracted_collections = []
-    relevant_contexts = []
+    query_results = []
    for file in files:
-
+        query_result = None
        context = None
        if file.get("docs"):
            # BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
-            context = {
+            query_result = {
                "documents": [[doc.get("content") for doc in file.get("docs")]],
                "metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
            }
        elif file.get("context") == "full":
            # Manual Full Mode Toggle
-            context = {
+            query_result = {
                "documents": [[file.get("file").get("data", {}).get("content")]],
                "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
            }
@ -500,7 +499,7 @@ def get_sources_from_files(
                            }
                        )
-                context = {
+                query_result = {
                    "documents": [documents],
                    "metadatas": [metadatas],
                }
@ -508,7 +507,7 @@ def get_sources_from_files(
            elif file.get("id"):
                file_object = Files.get_file_by_id(file.get("id"))
                if file_object:
-                    context = {
+                    query_result = {
                        "documents": [[file_object.data.get("content", "")]],
                        "metadatas": [
                            [
@ -521,7 +520,7 @@ def get_sources_from_files(
                        ],
                    }
            elif file.get("file").get("data"):
-                context = {
+                query_result = {
                    "documents": [[file.get("file").get("data", {}).get("content")]],
                    "metadatas": [
                        [file.get("file").get("data", {}).get("metadata", {})]
@ -549,19 +548,27 @@ def get_sources_from_files(
            if full_context:
                try:
-                    context = get_all_items_from_collections(collection_names)
+                    query_result = get_all_items_from_collections(collection_names)
                except Exception as e:
                    log.exception(e)
            else:
                try:
-                    context = None
+                    query_result = None
                    if file.get("type") == "text":
-                        context = file["content"]
+                        # Not sure when this is used, but it seems to be a fallback
                        query_result = {
                            "documents": [
                                [file.get("file").get("data", {}).get("content")]
                            ],
                            "metadatas": [
                                [file.get("file").get("data", {}).get("meta", {})]
                            ],
                        }
                    else:
                        if hybrid_search:
                            try:
-                                context = query_collection_with_hybrid_search(
+                                query_result = query_collection_with_hybrid_search(
                                    collection_names=collection_names,
                                    queries=queries,
                                    embedding_function=embedding_function,
@ -577,8 +584,8 @@ def get_sources_from_files(
                                    " non hybrid search as fallback."
                                )
-                        if (not hybrid_search) or (context is None):
+                        if (not hybrid_search) or (query_result is None):
-                            context = query_collection(
+                            query_result = query_collection(
                                collection_names=collection_names,
                                queries=queries,
                                embedding_function=embedding_function,
@ -589,24 +596,24 @@ def get_sources_from_files(
            extracted_collections.extend(collection_names)
-        if context:
+        if query_result:
            if "data" in file:
                del file["data"]
-            relevant_contexts.append({**context, "file": file})
+            query_results.append({**query_result, "file": file})
    sources = []
-    for context in relevant_contexts:
+    for query_result in query_results:
        try:
-            if "documents" in context:
+            if "documents" in query_result:
-                if "metadatas" in context:
+                if "metadatas" in query_result:
                    source = {
-                        "source": context["file"],
+                        "source": query_result["file"],
-                        "document": context["documents"][0],
+                        "document": query_result["documents"][0],
-                        "metadata": context["metadatas"][0],
+                        "metadata": query_result["metadatas"][0],
                    }
-                    if "distances" in context and context["distances"]:
+                    if "distances" in query_result and query_result["distances"]:
-                        source["distances"] = context["distances"][0]
+                        source["distances"] = query_result["distances"][0]
                    sources.append(source)
        except Exception as e:
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@ -718,6 +718,10 @@ def apply_params_to_form_data(form_data, model):
 async def process_chat_payload(request, form_data, user, metadata, model):
    # Pipeline Inlet -> Filter Inlet -> Chat Memory -> Chat Web Search -> Chat Image Generation
    # -> Chat Code Interpreter (Form Data Update) -> (Default) Chat Tools Function Calling
    # -> Chat Files
    form_data = apply_params_to_form_data(form_data, model)
    log.debug(f"form_data: {form_data}")
@ -911,7 +915,6 @@ async def process_chat_payload(request, form_data, user, metadata, model):
                    request, form_data, extra_params, user, models, tools_dict
                )
                sources.extend(flags.get("sources", []))
            except Exception as e:
                log.exception(e)
@ -924,24 +927,27 @@ async def process_chat_payload(request, form_data, user, metadata, model):
    # If context is not empty, insert it into the messages
    if len(sources) > 0:
        context_string = ""
-        citation_idx = {}
+        citation_idx_map = {}
        for source in sources:
            if "document" in source:
-                for doc_context, doc_meta in zip(
+                for document_text, document_metadata in zip(
                    source["document"], source["metadata"]
                ):
                    source_name = source.get("source", {}).get("name", None)
-                    citation_id = (
+                    source_id = (
-                        doc_meta.get("source", None)
+                        document_metadata.get("source", None)
                        or source.get("source", {}).get("id", None)
                        or "N/A"
                    )
-                    if citation_id not in citation_idx:
+
-                        citation_idx[citation_id] = len(citation_idx) + 1
+                    if source_id not in citation_idx_map:
                        citation_idx_map[source_id] = len(citation_idx_map) + 1
                    context_string += (
-                        f'<source id="{citation_idx[citation_id]}"'
+                        f'<source id="{citation_idx_map[source_id]}"'
                        + (f' name="{source_name}"' if source_name else "")
-                        + f">{doc_context}</source>\n"
+                        + f">{document_text}</source>\n"
                    )
        context_string = context_string.strip()