diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index f34c78dd8..7fff81ed3 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1780,6 +1780,12 @@ RAG_WEB_SEARCH_ENGINE = PersistentConfig( os.getenv("RAG_WEB_SEARCH_ENGINE", ""), ) +RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig( + "RAG_WEB_SEARCH_FULL_CONTEXT", + "rag.web.search.full_context", + os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true", +) + # You can provide a list of your own websites to filter after performing a web search. # This ensures the highest level of safety and reliability of the information sources. RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 19ed89880..dd0c2bf9f 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -179,6 +179,7 @@ from open_webui.config import ( YOUTUBE_LOADER_PROXY_URL, # Retrieval (Web Search) RAG_WEB_SEARCH_ENGINE, + RAG_WEB_SEARCH_FULL_CONTEXT, RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_CONCURRENT_REQUESTS, RAG_WEB_SEARCH_TRUST_ENV, @@ -548,6 +549,7 @@ app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE +app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index b7da20ad5..27526511c 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -304,7 +304,14 @@ def get_sources_from_files( relevant_contexts = [] for file in files: - if file.get("context") == "full": + if file.get("docs"): + + print("file.get('docs')", file.get("docs")) + context = { + "documents": [[doc.get("content") for doc in file.get("docs")]], + "metadatas": [[doc.get("metadata") for doc in file.get("docs")]], + } + elif file.get("context") == "full": context = { "documents": [[file.get("file").get("data", {}).get("content")]], "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 47e6253c8..bbad39ec0 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -371,7 +371,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, }, "web": { - "web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT, "search": { "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, @@ -457,7 +458,8 @@ class WebSearchConfig(BaseModel): class WebConfig(BaseModel): search: WebSearchConfig - web_loader_ssl_verification: Optional[bool] = None + ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None + RAG_WEB_SEARCH_FULL_CONTEXT: Optional[bool] = None class ConfigUpdateForm(BaseModel): @@ -512,11 +514,16 @@ async def update_rag_config( if form_data.web is not None: request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False - form_data.web.web_loader_ssl_verification + form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION ) request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine + + request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = ( + form_data.web.RAG_WEB_SEARCH_FULL_CONTEXT + ) + request.app.state.config.SEARXNG_QUERY_URL = ( form_data.web.search.searxng_query_url ) @@ -600,7 +607,8 @@ async def update_rag_config( "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, }, "web": { - "web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT, "search": { "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, @@ -1349,21 +1357,36 @@ async def process_web_search( trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, ) docs = await loader.aload() - await run_in_threadpool( - save_docs_to_vector_db, - request, - docs, - collection_name, - overwrite=True, - user=user, - ) - return { - "status": True, - "collection_name": collection_name, - "filenames": urls, - "loaded_count": len(docs), - } + if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT: + return { + "status": True, + "docs": [ + { + "content": doc.page_content, + "metadata": doc.metadata, + } + for doc in docs + ], + "filenames": urls, + "loaded_count": len(docs), + } + else: + await run_in_threadpool( + save_docs_to_vector_db, + request, + docs, + collection_name, + overwrite=True, + user=user, + ) + + return { + "status": True, + "collection_name": collection_name, + "filenames": urls, + "loaded_count": len(docs), + } except Exception as e: log.exception(e) raise HTTPException( diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 27e751e0b..484cf06d3 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -362,14 +362,25 @@ async def chat_web_search_handler( ) files = form_data.get("files", []) - files.append( - { - "collection_name": results["collection_name"], - "name": searchQuery, - "type": "web_search_results", - "urls": results["filenames"], - } - ) + + if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT: + files.append( + { + "docs": results.get("docs", []), + "name": searchQuery, + "type": "web_search_docs", + "urls": results["filenames"], + } + ) + else: + files.append( + { + "collection_name": results["collection_name"], + "name": searchQuery, + "type": "web_search_results", + "urls": results["filenames"], + } + ) form_data["files"] = files else: await event_emitter( diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index aa4d67180..84e9d0e5a 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -6,6 +6,7 @@ import { onMount, getContext } from 'svelte'; import { toast } from 'svelte-sonner'; import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; + import Tooltip from '$lib/components/common/Tooltip.svelte'; const i18n = getContext('i18n'); @@ -116,6 +117,19 @@ +
+
{$i18n.t('Full Context Mode')}
+
+ + + +
+
+ {#if webConfig.search.engine !== ''}
{#if webConfig.search.engine === 'searxng'} @@ -424,12 +438,13 @@