From 240d91d38d74cef447e39c9c54cb30e67a277130 Mon Sep 17 00:00:00 2001 From: Stephen Smith Date: Sat, 26 Apr 2025 21:27:55 -0400 Subject: [PATCH] Add yacy config for user/pass, automatically add yacy json api path --- backend/open_webui/config.py | 12 +++++++ backend/open_webui/main.py | 4 +++ backend/open_webui/retrieval/web/yacy.py | 15 ++++++--- backend/open_webui/routers/retrieval.py | 12 ++++++- .../admin/Settings/WebSearch.svelte | 31 +++++++++++++++++-- 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 6574f2855..82842632b 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2093,6 +2093,18 @@ YACY_QUERY_URL = PersistentConfig( os.getenv("YACY_QUERY_URL", ""), ) +YACY_USERNAME = PersistentConfig( + "YACY_USERNAME", + "rag.web.search.yacy_username", + os.getenv("YACY_USERNAME", ""), +) + +YACY_PASSWORD = PersistentConfig( + "YACY_PASSWORD", + "rag.web.search.yacy_password", + os.getenv("YACY_PASSWORD", ""), +) + GOOGLE_PSE_API_KEY = PersistentConfig( "GOOGLE_PSE_API_KEY", "rag.web.search.google_pse_api_key", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index cb2194f47..cf1d23125 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -220,6 +220,8 @@ from open_webui.config import ( SERPAPI_ENGINE, SEARXNG_QUERY_URL, YACY_QUERY_URL, + YACY_USERNAME, + YACY_PASSWORD, SERPER_API_KEY, SERPLY_API_KEY, SERPSTACK_API_KEY, @@ -648,6 +650,8 @@ app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATI app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL app.state.config.YACY_QUERY_URL = YACY_QUERY_URL +app.state.config.YACY_USERNAME = YACY_USERNAME +app.state.config.YACY_PASSWORD = YACY_PASSWORD app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY diff --git a/backend/open_webui/retrieval/web/yacy.py b/backend/open_webui/retrieval/web/yacy.py index 357c3face..904df0716 100644 --- a/backend/open_webui/retrieval/web/yacy.py +++ b/backend/open_webui/retrieval/web/yacy.py @@ -12,6 +12,8 @@ log.setLevel(SRC_LOG_LEVELS["RAG"]) def search_yacy( query_url: str, + username: Optional[str], + password: Optional[str], query: str, count: int, filter_list: Optional[list[str]] = None, @@ -46,6 +48,11 @@ def search_yacy( time_range = kwargs.get("time_range", "") categories = "".join(kwargs.get("categories", [])) + # Use authentication if either username or password is set + yacy_auth = None + if username or password: + yacy_auth = HTTPDigestAuth(username, password) + params = { "query": query, "resource": "global", @@ -60,16 +67,16 @@ def search_yacy( # "image_proxy": 0, } - # Legacy query format - if "" in query_url: + # Check if provided a json API URL + if not query_url.endswith("yacysearch.json"): # Strip all query parameters from the URL - query_url = query_url.split("?")[0] + query_url = query_url.rstrip('/') + "/yacysearch.json" log.debug(f"searching {query_url}") response = requests.get( query_url, - auth=HTTPDigestAuth('admin', 'yacy'), + auth=yacy_auth, headers={ "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", "Accept": "text/html", diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 9e18d35b2..0a3357f40 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -391,6 +391,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL, + "YACY_USERNAME": request.app.state.config.YACY_USERNAME, + "YACY_PASSWORD": request.app.state.config.YACY_PASSWORD, "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, @@ -437,6 +439,8 @@ class WebConfig(BaseModel): BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None SEARXNG_QUERY_URL: Optional[str] = None YACY_QUERY_URL: Optional[str] = None + YACY_USERNAME: Optional[str] = None + YACY_PASSWORD: Optional[str] = None GOOGLE_PSE_API_KEY: Optional[str] = None GOOGLE_PSE_ENGINE_ID: Optional[str] = None BRAVE_SEARCH_API_KEY: Optional[str] = None @@ -655,6 +659,8 @@ async def update_rag_config( ) request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL + request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME + request.app.state.config.YACY_PASSWORD = form_data.web.YACY_PASSWORD request.app.state.config.GOOGLE_PSE_API_KEY = form_data.web.GOOGLE_PSE_API_KEY request.app.state.config.GOOGLE_PSE_ENGINE_ID = ( form_data.web.GOOGLE_PSE_ENGINE_ID @@ -754,6 +760,8 @@ async def update_rag_config( "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, "YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL, + "YACY_USERNAME": request.app.state.config.YACY_USERNAME, + "YACY_PASSWORD": request.app.state.config.YACY_PASSWORD, "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, @@ -1271,7 +1279,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: """Search the web using a search engine and return the results as a list of SearchResult objects. Will look for a search engine API key in environment variables in the following order: - SEARXNG_QUERY_URL - - YACY_QUERY_URL + - YACY_QUERY_URL + YACY_USERNAME + YACY_PASSWORD - GOOGLE_PSE_API_KEY + GOOGLE_PSE_ENGINE_ID - BRAVE_SEARCH_API_KEY - KAGI_SEARCH_API_KEY @@ -1305,6 +1313,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: if request.app.state.config.YACY_QUERY_URL: return search_yacy( request.app.state.config.YACY_QUERY_URL, + request.app.state.config.YACY_USERNAME, + request.app.state.config.YACY_PASSWORD, query, request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index d5ba2f079..774173420 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -148,7 +148,7 @@
- {$i18n.t('Yacy Query URL')} + {$i18n.t('Yacy Instance URL')}
@@ -156,7 +156,7 @@ @@ -164,6 +164,33 @@
+
+
+
+
+ {$i18n.t('Yacy Username')} +
+ + +
+ +
+
+ {$i18n.t('Yacy Password')} +
+ + +
+
+
{:else if webConfig.WEB_SEARCH_ENGINE === 'google_pse'}