From 5c60081c1f24704d3443a735d5d5559e8f1bbf67 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 3 Nov 2024 17:07:24 -0800 Subject: [PATCH] refac: web search --- backend/open_webui/apps/retrieval/main.py | 37 ++++++++++++++--- .../apps/retrieval/web/jina_search.py | 6 +-- backend/open_webui/config.py | 21 ++++++++++ backend/open_webui/env.py | 14 ++----- .../admin/Settings/WebSearch.svelte | 40 +++++++++++++++++++ 5 files changed, 98 insertions(+), 20 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e3f9707ef..5210b57ca 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -86,6 +86,7 @@ from open_webui.config import ( RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, RAG_WEB_SEARCH_ENGINE, RAG_WEB_SEARCH_RESULT_COUNT, + JINA_API_KEY, SEARCHAPI_API_KEY, SEARCHAPI_ENGINE, SEARXNG_QUERY_URL, @@ -94,6 +95,8 @@ from open_webui.config import ( SERPSTACK_API_KEY, SERPSTACK_HTTPS, TAVILY_API_KEY, + BING_SEARCH_V7_ENDPOINT, + BING_SEARCH_V7_SUBSCRIPTION_KEY, TIKA_SERVER_URL, UPLOAD_DIR, YOUTUBE_LOADER_LANGUAGE, @@ -105,8 +108,6 @@ from open_webui.env import ( SRC_LOG_LEVELS, DEVICE_TYPE, DOCKER, - BING_SEARCH_V7_ENDPOINT, - BING_SEARCH_V7_SUBSCRIPTION_KEY, ) from open_webui.utils.misc import ( calculate_sha256, @@ -179,6 +180,10 @@ app.state.config.SERPLY_API_KEY = SERPLY_API_KEY app.state.config.TAVILY_API_KEY = TAVILY_API_KEY app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE +app.state.config.JINA_API_KEY = JINA_API_KEY +app.state.config.BING_SEARCH_V7_ENDPOINT = BING_SEARCH_V7_ENDPOINT +app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_KEY + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS @@ -438,6 +443,9 @@ async def get_rag_config(user=Depends(get_admin_user)): "tavily_api_key": app.state.config.TAVILY_API_KEY, "searchapi_api_key": app.state.config.SEARCHAPI_API_KEY, "seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE, + "jina_api_key": app.state.config.JINA_API_KEY, + "bing_search_v7_endpoint": app.state.config.BING_SEARCH_V7_ENDPOINT, + "bing_search_v7_subscription_key": app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -480,6 +488,9 @@ class WebSearchConfig(BaseModel): tavily_api_key: Optional[str] = None searchapi_api_key: Optional[str] = None searchapi_engine: Optional[str] = None + jina_api_key: Optional[str] = None + bing_search_v7_endpoint: Optional[str] = None + bing_search_v7_subscription_key: Optional[str] = None result_count: Optional[int] = None concurrent_requests: Optional[int] = None @@ -546,6 +557,15 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key app.state.config.SEARCHAPI_ENGINE = form_data.web.search.searchapi_engine + + app.state.config.JINA_API_KEY = form_data.web.search.jina_api_key + app.state.config.BING_SEARCH_V7_ENDPOINT = ( + form_data.web.search.bing_search_v7_endpoint + ) + app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( + form_data.web.search.bing_search_v7_subscription_key + ) + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.search.concurrent_requests @@ -587,6 +607,9 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ "serachapi_api_key": app.state.config.SEARCHAPI_API_KEY, "searchapi_engine": app.state.config.SEARCHAPI_ENGINE, "tavily_api_key": app.state.config.TAVILY_API_KEY, + "jina_api_key": app.state.config.JINA_API_KEY, + "bing_search_v7_endpoint": app.state.config.BING_SEARCH_V7_ENDPOINT, + "bing_search_v7_subscription_key": app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, }, @@ -1163,11 +1186,15 @@ def search_web(engine: str, query: str) -> list[SearchResult]: else: raise Exception("No SEARCHAPI_API_KEY found in environment variables") elif engine == "jina": - return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT) + return search_jina( + app.state.config.JINA_API_KEY, + query, + app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + ) elif engine == "bing": return search_bing( - BING_SEARCH_V7_SUBSCRIPTION_KEY, - BING_SEARCH_V7_ENDPOINT, + app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, + app.state.config.BING_SEARCH_V7_ENDPOINT, str(DEFAULT_LOCALE), query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, diff --git a/backend/open_webui/apps/retrieval/web/jina_search.py b/backend/open_webui/apps/retrieval/web/jina_search.py index 487bbc948..f5e2febbe 100644 --- a/backend/open_webui/apps/retrieval/web/jina_search.py +++ b/backend/open_webui/apps/retrieval/web/jina_search.py @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) -def search_jina(query: str, count: int) -> list[SearchResult]: +def search_jina(api_key: str, query: str, count: int) -> list[SearchResult]: """ Search using Jina's Search API and return the results as a list of SearchResult objects. Args: @@ -20,9 +20,7 @@ def search_jina(query: str, count: int) -> list[SearchResult]: list[SearchResult]: A list of search results """ jina_search_endpoint = "https://s.jina.ai/" - headers = { - "Accept": "application/json", - } + headers = {"Accept": "application/json", "Authorization": f"Bearer {api_key}"} url = str(URL(jina_search_endpoint + query)) response = requests.get(url, headers=headers) response.raise_for_status() diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 06ac258f9..f7c00ccde 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1218,6 +1218,12 @@ TAVILY_API_KEY = PersistentConfig( os.getenv("TAVILY_API_KEY", ""), ) +JINA_API_KEY = PersistentConfig( + "JINA_API_KEY", + "rag.web.search.jina_api_key", + os.getenv("JINA_API_KEY", ""), +) + SEARCHAPI_API_KEY = PersistentConfig( "SEARCHAPI_API_KEY", "rag.web.search.searchapi_api_key", @@ -1230,6 +1236,21 @@ SEARCHAPI_ENGINE = PersistentConfig( os.getenv("SEARCHAPI_ENGINE", ""), ) +BING_SEARCH_V7_ENDPOINT = PersistentConfig( + "BING_SEARCH_V7_ENDPOINT", + "rag.web.search.bing_search_v7_endpoint", + os.environ.get( + "BING_SEARCH_V7_ENDPOINT", "https://api.bing.microsoft.com/v7.0/search" + ), +) + +BING_SEARCH_V7_SUBSCRIPTION_KEY = PersistentConfig( + "BING_SEARCH_V7_SUBSCRIPTION_KEY", + "rag.web.search.bing_search_v7_subscription_key", + os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", ""), +) + + RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( "RAG_WEB_SEARCH_RESULT_COUNT", "rag.web.search.result_count", diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index 152320f7c..6e591311d 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -199,9 +199,9 @@ SAFE_MODE = os.environ.get("SAFE_MODE", "false").lower() == "true" # ENABLE_FORWARD_USER_INFO_HEADERS #################################### -ENABLE_FORWARD_USER_INFO_HEADERS = os.environ.get( - "ENABLE_FORWARD_USER_INFO_HEADERS", "False" -).lower() == "true" +ENABLE_FORWARD_USER_INFO_HEADERS = ( + os.environ.get("ENABLE_FORWARD_USER_INFO_HEADERS", "False").lower() == "true" +) #################################### @@ -391,11 +391,3 @@ else: #################################### OFFLINE_MODE = os.environ.get("OFFLINE_MODE", "false").lower() == "true" - -#################################### -# WEB SEARCH -#################################### - -BING_SEARCH_V7_ENDPOINT = os.environ.get("BING_SEARCH_V7_ENDPOINT", "https://api.bing.microsoft.com/v7.0/search") - -BING_SEARCH_V7_SUBSCRIPTION_KEY = os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", "") \ No newline at end of file diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 49e6309b7..2886dc663 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -223,6 +223,46 @@ bind:value={webConfig.search.tavily_api_key} /> + {:else if webConfig.search.engine === 'jina'} +
+
+ {$i18n.t('Jina API Key')} +
+ + +
+ {:else if webConfig.search.engine === 'bing'} +
+
+ {$i18n.t('Bing Search V7 Endpoint')} +
+ +
+
+ +
+
+
+ +
+
+ {$i18n.t('Bing Search V7 Subscription Key')} +
+ + +
{/if} {/if}