From c5c4aef7b132233bd54eda4d8ac640a3b6e75ecd Mon Sep 17 00:00:00 2001 From: Danil Date: Mon, 26 Jan 2026 17:31:44 +0500 Subject: [PATCH] Yandex web search (#20922) Co-authored-by: Tim Baek Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com> --- backend/open_webui/config.py | 18 +++ backend/open_webui/main.py | 6 + backend/open_webui/retrieval/web/yandex.py | 147 ++++++++++++++++++ backend/open_webui/routers/retrieval.py | 30 ++++ .../admin/Settings/WebSearch.svelte | 51 +++++- 5 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 backend/open_webui/retrieval/web/yandex.py diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index e84f1fa51..b4f33afd4 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -3410,6 +3410,24 @@ EXTERNAL_WEB_LOADER_API_KEY = PersistentConfig( os.environ.get("EXTERNAL_WEB_LOADER_API_KEY", ""), ) +YANDEX_WEB_SEARCH_URL = PersistentConfig( + "YANDEX_WEB_SEARCH_URL", + "rag.web.search.yandex_web_search_url", + os.environ.get("YANDEX_WEB_SEARCH_URL", ""), +) + +YANDEX_WEB_SEARCH_API_KEY = PersistentConfig( + "YANDEX_WEB_SEARCH_API_KEY", + "rag.web.search.yandex_web_search_api_key", + os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""), +) + +YANDEX_WEB_SEARCH_CONFIG = PersistentConfig( + "YANDEX_WEB_SEARCH_CONFIG", + "rag.web.search.yandex_web_search_config", + os.environ.get("YANDEX_WEB_SEARCH_CONFIG", ""), +) + #################################### # Images #################################### diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 398a666d6..e34124812 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -353,6 +353,9 @@ from open_webui.config import ( EXTERNAL_WEB_SEARCH_API_KEY, EXTERNAL_WEB_LOADER_URL, EXTERNAL_WEB_LOADER_API_KEY, + YANDEX_WEB_SEARCH_URL, + YANDEX_WEB_SEARCH_API_KEY, + YANDEX_WEB_SEARCH_CONFIG, # WebUI WEBUI_AUTH, WEBUI_NAME, @@ -1008,6 +1011,9 @@ app.state.config.EXTERNAL_WEB_SEARCH_URL = EXTERNAL_WEB_SEARCH_URL app.state.config.EXTERNAL_WEB_SEARCH_API_KEY = EXTERNAL_WEB_SEARCH_API_KEY app.state.config.EXTERNAL_WEB_LOADER_URL = EXTERNAL_WEB_LOADER_URL app.state.config.EXTERNAL_WEB_LOADER_API_KEY = EXTERNAL_WEB_LOADER_API_KEY +app.state.config.YANDEX_WEB_SEARCH_URL = YANDEX_WEB_SEARCH_URL +app.state.config.YANDEX_WEB_SEARCH_API_KEY = YANDEX_WEB_SEARCH_API_KEY +app.state.config.YANDEX_WEB_SEARCH_CONFIG = YANDEX_WEB_SEARCH_CONFIG app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL diff --git a/backend/open_webui/retrieval/web/yandex.py b/backend/open_webui/retrieval/web/yandex.py new file mode 100644 index 000000000..def134d99 --- /dev/null +++ b/backend/open_webui/retrieval/web/yandex.py @@ -0,0 +1,147 @@ +import base64 +import io +import json +import logging +import os +from typing import Optional, List + +import requests + +from fastapi import Request + +from open_webui.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.utils.headers import include_user_info_headers + +from xml.etree import ElementTree as ET +from xml.etree.ElementTree import Element + +log = logging.getLogger(__name__) + + +def xml_element_contents_to_string(element: Element) -> str: + buffer = [element.text if element.text else ""] + + for child in element: + buffer.append(xml_element_contents_to_string(child)) + + buffer.append(element.tail if element.tail else "") + + return "".join(buffer) + + +def search_yandex( + request: Request, + yandex_search_url: str, + yandex_search_api_key: str, + yandex_search_config: str, + query: str, + count: int, + filter_list: Optional[List[str]] = None, + user=None, +) -> List[SearchResult]: + try: + headers = { + "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", + "Authorization": f"Api-Key {yandex_search_api_key}", + } + + if user is not None: + headers = include_user_info_headers(headers, user) + + chat_id = getattr(request.state, "chat_id", None) + if chat_id: + headers["X-OpenWebUI-Chat-Id"] = str(chat_id) + + payload = {} if yandex_search_config == "" else json.loads(yandex_search_config) + + if type(payload.get("query", None)) != dict: + payload["query"] = {} + + if "searchType" not in payload["query"]: + payload["query"]["searchType"] = "SEARCH_TYPE_RU" + + payload["query"]["queryText"] = query + + if type(payload.get("groupSpec", None)) != dict: + payload["groupSpec"] = {} + + if "groupMode" not in payload["groupSpec"]: + payload["groupSpec"]["groupMode"] = "GROUP_MODE_DEEP" + + payload["groupSpec"]["groupsOnPage"] = count + payload["groupSpec"]["docsInGroup"] = 1 + + response = requests.post( + "https://searchapi.api.cloud.yandex.net/v2/web/search" if yandex_search_url == "" else yandex_search_url, + headers=headers, + json=payload, + ) + + response.raise_for_status() + + response_body = response.json() + if "rawData" not in response_body: + raise Exception(f"No `rawData` in response body: {response_body}") + + search_result_body_bytes = base64.decodebytes(bytes(response_body["rawData"], "utf-8")) + + doc_root = ET.parse(io.BytesIO(search_result_body_bytes)) + + results = [] + + for group in doc_root.findall("response/results/grouping/group"): + results.append({ + "url": xml_element_contents_to_string(group.find("doc/url")).strip("\n"), + "title": xml_element_contents_to_string(group.find("doc/title")).strip("\n"), + "snippet": xml_element_contents_to_string(group.find("doc/passages/passage")), + }) + + results = get_filtered_results(results, filter_list) + + results = [ + SearchResult( + link=result.get("url"), + title=result.get("title"), + snippet=result.get("snippet"), + ) + for result in results[:count] + ] + + log.info(f"Yandex search results: {results}") + + return results + except Exception as e: + log.error(f"Error in search: {e}") + + return [] + + +if __name__ == "__main__": + from starlette.datastructures import Headers + from fastapi import FastAPI + + result = search_yandex( + Request( + { + "type": "http", + "asgi.version": "3.0", + "asgi.spec_version": "2.0", + "method": "GET", + "path": "/internal", + "query_string": b"", + "headers": Headers({}).raw, + "client": ("127.0.0.1", 12345), + "server": ("127.0.0.1", 80), + "scheme": "http", + "app": FastAPI(), + }, + None, + ), + os.environ.get("YANDEX_WEB_SEARCH_URL", ""), + os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""), + os.environ.get("YANDEX_WEB_SEARCH_CONFIG", "{\"query\": {\"searchType\": \"SEARCH_TYPE_COM\"}}"), + "TOP movies of the past year", + 3, + ) + + print(result) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 31df31f23..05b6ef806 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -76,6 +76,7 @@ from open_webui.retrieval.web.perplexity import search_perplexity from open_webui.retrieval.web.sougou import search_sougou from open_webui.retrieval.web.firecrawl import search_firecrawl from open_webui.retrieval.web.external import search_external +from open_webui.retrieval.web.yandex import search_yandex from open_webui.retrieval.utils import ( get_content_from_url, @@ -578,6 +579,9 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, + "YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL, + "YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY, + "YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG, }, } @@ -641,6 +645,9 @@ class WebConfig(BaseModel): YOUTUBE_LOADER_LANGUAGE: Optional[List[str]] = None YOUTUBE_LOADER_PROXY_URL: Optional[str] = None YOUTUBE_LOADER_TRANSLATION: Optional[str] = None + YANDEX_WEB_SEARCH_URL: Optional[str] = None + YANDEX_WEB_SEARCH_API_KEY: Optional[str] = None + YANDEX_WEB_SEARCH_CONFIG: Optional[str] = None class ConfigForm(BaseModel): @@ -1176,6 +1183,15 @@ async def update_rag_config( request.app.state.YOUTUBE_LOADER_TRANSLATION = ( form_data.web.YOUTUBE_LOADER_TRANSLATION ) + request.app.state.config.YANDEX_WEB_SEARCH_URL = ( + form_data.web.YANDEX_WEB_SEARCH_URL + ) + request.app.state.config.YANDEX_WEB_SEARCH_API_KEY = ( + form_data.web.YANDEX_WEB_SEARCH_API_KEY + ) + request.app.state.config.YANDEX_WEB_SEARCH_CONFIG = ( + form_data.web.YANDEX_WEB_SEARCH_CONFIG + ) return { "status": True, @@ -1300,6 +1316,9 @@ async def update_rag_config( "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, + "YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL, + "YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY, + "YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG, }, } @@ -2240,6 +2259,17 @@ def search_web( request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, user=user, ) + elif engine == "yandex": + return search_yandex( + request, + request.app.state.config.YANDEX_WEB_SEARCH_URL, + request.app.state.config.YANDEX_WEB_SEARCH_API_KEY, + request.app.state.config.YANDEX_WEB_SEARCH_CONFIG, + query, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + user=user, + ) else: raise Exception("No search engine API key found in environment variables") diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index e91a110f8..587b494f0 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -7,6 +7,7 @@ import { toast } from 'svelte-sonner'; import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte'; + import Textarea from '$lib/components/common/Textarea.svelte'; const i18n = getContext('i18n'); @@ -35,7 +36,8 @@ 'perplexity', 'sougou', 'firecrawl', - 'external' + 'external', + 'yandex' ]; let webLoaderEngines = ['playwright', 'firecrawl', 'tavily', 'external']; @@ -735,6 +737,53 @@ /> + {:else if webConfig.WEB_SEARCH_ENGINE === 'yandex'} +
+
+
+ {$i18n.t('Yandex Web Search URL')} +
+ +
+
+ +
+
+
+ +
+
+ {$i18n.t('Yandex Web Search API Key')} +
+ + +
+ +
+
{$i18n.t('Yandex Web Search config')}
+ + +