Yandex web search (#20922)

Co-authored-by: Tim Baek <tim@openwebui.com>
Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com>
This commit is contained in:
Danil
2026-01-26 17:31:44 +05:00
committed by GitHub
parent 533c7b27eb
commit c5c4aef7b1
5 changed files with 251 additions and 1 deletions

View File

@@ -3410,6 +3410,24 @@ EXTERNAL_WEB_LOADER_API_KEY = PersistentConfig(
os.environ.get("EXTERNAL_WEB_LOADER_API_KEY", ""),
)
YANDEX_WEB_SEARCH_URL = PersistentConfig(
"YANDEX_WEB_SEARCH_URL",
"rag.web.search.yandex_web_search_url",
os.environ.get("YANDEX_WEB_SEARCH_URL", ""),
)
YANDEX_WEB_SEARCH_API_KEY = PersistentConfig(
"YANDEX_WEB_SEARCH_API_KEY",
"rag.web.search.yandex_web_search_api_key",
os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""),
)
YANDEX_WEB_SEARCH_CONFIG = PersistentConfig(
"YANDEX_WEB_SEARCH_CONFIG",
"rag.web.search.yandex_web_search_config",
os.environ.get("YANDEX_WEB_SEARCH_CONFIG", ""),
)
####################################
# Images
####################################

View File

@@ -353,6 +353,9 @@ from open_webui.config import (
EXTERNAL_WEB_SEARCH_API_KEY,
EXTERNAL_WEB_LOADER_URL,
EXTERNAL_WEB_LOADER_API_KEY,
YANDEX_WEB_SEARCH_URL,
YANDEX_WEB_SEARCH_API_KEY,
YANDEX_WEB_SEARCH_CONFIG,
# WebUI
WEBUI_AUTH,
WEBUI_NAME,
@@ -1008,6 +1011,9 @@ app.state.config.EXTERNAL_WEB_SEARCH_URL = EXTERNAL_WEB_SEARCH_URL
app.state.config.EXTERNAL_WEB_SEARCH_API_KEY = EXTERNAL_WEB_SEARCH_API_KEY
app.state.config.EXTERNAL_WEB_LOADER_URL = EXTERNAL_WEB_LOADER_URL
app.state.config.EXTERNAL_WEB_LOADER_API_KEY = EXTERNAL_WEB_LOADER_API_KEY
app.state.config.YANDEX_WEB_SEARCH_URL = YANDEX_WEB_SEARCH_URL
app.state.config.YANDEX_WEB_SEARCH_API_KEY = YANDEX_WEB_SEARCH_API_KEY
app.state.config.YANDEX_WEB_SEARCH_CONFIG = YANDEX_WEB_SEARCH_CONFIG
app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL

View File

@@ -0,0 +1,147 @@
import base64
import io
import json
import logging
import os
from typing import Optional, List
import requests
from fastapi import Request
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.utils.headers import include_user_info_headers
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import Element
log = logging.getLogger(__name__)
def xml_element_contents_to_string(element: Element) -> str:
buffer = [element.text if element.text else ""]
for child in element:
buffer.append(xml_element_contents_to_string(child))
buffer.append(element.tail if element.tail else "")
return "".join(buffer)
def search_yandex(
request: Request,
yandex_search_url: str,
yandex_search_api_key: str,
yandex_search_config: str,
query: str,
count: int,
filter_list: Optional[List[str]] = None,
user=None,
) -> List[SearchResult]:
try:
headers = {
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
"Authorization": f"Api-Key {yandex_search_api_key}",
}
if user is not None:
headers = include_user_info_headers(headers, user)
chat_id = getattr(request.state, "chat_id", None)
if chat_id:
headers["X-OpenWebUI-Chat-Id"] = str(chat_id)
payload = {} if yandex_search_config == "" else json.loads(yandex_search_config)
if type(payload.get("query", None)) != dict:
payload["query"] = {}
if "searchType" not in payload["query"]:
payload["query"]["searchType"] = "SEARCH_TYPE_RU"
payload["query"]["queryText"] = query
if type(payload.get("groupSpec", None)) != dict:
payload["groupSpec"] = {}
if "groupMode" not in payload["groupSpec"]:
payload["groupSpec"]["groupMode"] = "GROUP_MODE_DEEP"
payload["groupSpec"]["groupsOnPage"] = count
payload["groupSpec"]["docsInGroup"] = 1
response = requests.post(
"https://searchapi.api.cloud.yandex.net/v2/web/search" if yandex_search_url == "" else yandex_search_url,
headers=headers,
json=payload,
)
response.raise_for_status()
response_body = response.json()
if "rawData" not in response_body:
raise Exception(f"No `rawData` in response body: {response_body}")
search_result_body_bytes = base64.decodebytes(bytes(response_body["rawData"], "utf-8"))
doc_root = ET.parse(io.BytesIO(search_result_body_bytes))
results = []
for group in doc_root.findall("response/results/grouping/group"):
results.append({
"url": xml_element_contents_to_string(group.find("doc/url")).strip("\n"),
"title": xml_element_contents_to_string(group.find("doc/title")).strip("\n"),
"snippet": xml_element_contents_to_string(group.find("doc/passages/passage")),
})
results = get_filtered_results(results, filter_list)
results = [
SearchResult(
link=result.get("url"),
title=result.get("title"),
snippet=result.get("snippet"),
)
for result in results[:count]
]
log.info(f"Yandex search results: {results}")
return results
except Exception as e:
log.error(f"Error in search: {e}")
return []
if __name__ == "__main__":
from starlette.datastructures import Headers
from fastapi import FastAPI
result = search_yandex(
Request(
{
"type": "http",
"asgi.version": "3.0",
"asgi.spec_version": "2.0",
"method": "GET",
"path": "/internal",
"query_string": b"",
"headers": Headers({}).raw,
"client": ("127.0.0.1", 12345),
"server": ("127.0.0.1", 80),
"scheme": "http",
"app": FastAPI(),
},
None,
),
os.environ.get("YANDEX_WEB_SEARCH_URL", ""),
os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""),
os.environ.get("YANDEX_WEB_SEARCH_CONFIG", "{\"query\": {\"searchType\": \"SEARCH_TYPE_COM\"}}"),
"TOP movies of the past year",
3,
)
print(result)

View File

@@ -76,6 +76,7 @@ from open_webui.retrieval.web.perplexity import search_perplexity
from open_webui.retrieval.web.sougou import search_sougou
from open_webui.retrieval.web.firecrawl import search_firecrawl
from open_webui.retrieval.web.external import search_external
from open_webui.retrieval.web.yandex import search_yandex
from open_webui.retrieval.utils import (
get_content_from_url,
@@ -578,6 +579,9 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
"YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
"YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION,
"YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL,
"YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
"YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
},
}
@@ -641,6 +645,9 @@ class WebConfig(BaseModel):
YOUTUBE_LOADER_LANGUAGE: Optional[List[str]] = None
YOUTUBE_LOADER_PROXY_URL: Optional[str] = None
YOUTUBE_LOADER_TRANSLATION: Optional[str] = None
YANDEX_WEB_SEARCH_URL: Optional[str] = None
YANDEX_WEB_SEARCH_API_KEY: Optional[str] = None
YANDEX_WEB_SEARCH_CONFIG: Optional[str] = None
class ConfigForm(BaseModel):
@@ -1176,6 +1183,15 @@ async def update_rag_config(
request.app.state.YOUTUBE_LOADER_TRANSLATION = (
form_data.web.YOUTUBE_LOADER_TRANSLATION
)
request.app.state.config.YANDEX_WEB_SEARCH_URL = (
form_data.web.YANDEX_WEB_SEARCH_URL
)
request.app.state.config.YANDEX_WEB_SEARCH_API_KEY = (
form_data.web.YANDEX_WEB_SEARCH_API_KEY
)
request.app.state.config.YANDEX_WEB_SEARCH_CONFIG = (
form_data.web.YANDEX_WEB_SEARCH_CONFIG
)
return {
"status": True,
@@ -1300,6 +1316,9 @@ async def update_rag_config(
"YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
"YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
"YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION,
"YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL,
"YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
"YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
},
}
@@ -2240,6 +2259,17 @@ def search_web(
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
user=user,
)
elif engine == "yandex":
return search_yandex(
request,
request.app.state.config.YANDEX_WEB_SEARCH_URL,
request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
query,
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
user=user,
)
else:
raise Exception("No search engine API key found in environment variables")