From d0ddb0637e3651635fb1b720e55b414c8af340b4 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 27 Feb 2025 16:34:05 -0800 Subject: [PATCH] enh: web embed bypass embedding and retrieval support --- backend/open_webui/retrieval/utils.py | 7 +++++++ backend/open_webui/routers/retrieval.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index b6253e63c..029a33a56 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -414,6 +414,13 @@ def get_sources_from_files( ] ], } + elif file.get("file").get("data"): + context = { + "documents": [[file.get("file").get("data", {}).get("content")]], + "metadatas": [ + [file.get("file").get("data", {}).get("metadata", {})] + ], + } else: collection_names = [] if file.get("type") == "collection": diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 5943ae33f..9a0855f25 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1187,9 +1187,13 @@ def process_web( content = " ".join([doc.page_content for doc in docs]) log.debug(f"text_content: {content}") - save_docs_to_vector_db( - request, docs, collection_name, overwrite=True, user=user - ) + + if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: + save_docs_to_vector_db( + request, docs, collection_name, overwrite=True, user=user + ) + else: + collection_name = None return { "status": True, @@ -1201,6 +1205,7 @@ def process_web( }, "meta": { "name": form_data.url, + "source": form_data.url, }, }, }