diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index b1955b056..85fac016a 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2177,6 +2177,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig( ) +BYPASS_WEB_SEARCH_WEB_LOADER = PersistentConfig( + "BYPASS_WEB_SEARCH_WEB_LOADER", + "rag.web.search.bypass_web_loader", + os.getenv("BYPASS_WEB_SEARCH_WEB_LOADER", "False").lower() == "true", +) + WEB_SEARCH_RESULT_COUNT = PersistentConfig( "WEB_SEARCH_RESULT_COUNT", "rag.web.search.result_count", @@ -2202,6 +2208,7 @@ WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")), ) + WEB_LOADER_ENGINE = PersistentConfig( "WEB_LOADER_ENGINE", "rag.web.loader.engine", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a5aee4bb8..1c3681c22 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -228,6 +228,7 @@ from open_webui.config import ( ENABLE_WEB_SEARCH, WEB_SEARCH_ENGINE, BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + BYPASS_WEB_SEARCH_WEB_LOADER, WEB_SEARCH_RESULT_COUNT, WEB_SEARCH_CONCURRENT_REQUESTS, WEB_SEARCH_TRUST_ENV, @@ -707,6 +708,7 @@ app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) +app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = BYPASS_WEB_SEARCH_WEB_LOADER app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 5cb47373f..82114d755 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -387,6 +387,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL, "YACY_USERNAME": request.app.state.config.YACY_USERNAME, @@ -439,6 +440,7 @@ class WebConfig(BaseModel): WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None + BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None SEARXNG_QUERY_URL: Optional[str] = None YACY_QUERY_URL: Optional[str] = None YACY_USERNAME: Optional[str] = None @@ -751,6 +753,9 @@ async def update_rag_config( request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) + request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = ( + form_data.web.BYPASS_WEB_SEARCH_WEB_LOADER + ) request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME @@ -875,6 +880,7 @@ async def update_rag_config( "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL, "YACY_USERNAME": request.app.state.config.YACY_USERNAME, @@ -1678,13 +1684,29 @@ async def process_web_search( ) try: - loader = get_web_loader( - urls, - verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, - trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, - ) - docs = await loader.aload() + if request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER: + docs = [ + Document( + page_content=result.snippet, + metadata={ + "source": result.link, + "title": result.title, + "snippet": result.snippet, + "link": result.link, + }, + ) + for result in search_results + if hasattr(result, "snippet") + ] + else: + loader = get_web_loader( + urls, + verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, + ) + docs = await loader.aload() + urls = [ doc.metadata.get("source") for doc in docs if doc.metadata.get("source") ] # only keep the urls returned by the loader diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index e25852cc3..c0712d8ad 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -613,6 +613,19 @@ +