Yandex web search (#20922)
Co-authored-by: Tim Baek <tim@openwebui.com> Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com>
This commit is contained in:
@@ -3410,6 +3410,24 @@ EXTERNAL_WEB_LOADER_API_KEY = PersistentConfig(
|
||||
os.environ.get("EXTERNAL_WEB_LOADER_API_KEY", ""),
|
||||
)
|
||||
|
||||
YANDEX_WEB_SEARCH_URL = PersistentConfig(
|
||||
"YANDEX_WEB_SEARCH_URL",
|
||||
"rag.web.search.yandex_web_search_url",
|
||||
os.environ.get("YANDEX_WEB_SEARCH_URL", ""),
|
||||
)
|
||||
|
||||
YANDEX_WEB_SEARCH_API_KEY = PersistentConfig(
|
||||
"YANDEX_WEB_SEARCH_API_KEY",
|
||||
"rag.web.search.yandex_web_search_api_key",
|
||||
os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""),
|
||||
)
|
||||
|
||||
YANDEX_WEB_SEARCH_CONFIG = PersistentConfig(
|
||||
"YANDEX_WEB_SEARCH_CONFIG",
|
||||
"rag.web.search.yandex_web_search_config",
|
||||
os.environ.get("YANDEX_WEB_SEARCH_CONFIG", ""),
|
||||
)
|
||||
|
||||
####################################
|
||||
# Images
|
||||
####################################
|
||||
|
||||
@@ -353,6 +353,9 @@ from open_webui.config import (
|
||||
EXTERNAL_WEB_SEARCH_API_KEY,
|
||||
EXTERNAL_WEB_LOADER_URL,
|
||||
EXTERNAL_WEB_LOADER_API_KEY,
|
||||
YANDEX_WEB_SEARCH_URL,
|
||||
YANDEX_WEB_SEARCH_API_KEY,
|
||||
YANDEX_WEB_SEARCH_CONFIG,
|
||||
# WebUI
|
||||
WEBUI_AUTH,
|
||||
WEBUI_NAME,
|
||||
@@ -1008,6 +1011,9 @@ app.state.config.EXTERNAL_WEB_SEARCH_URL = EXTERNAL_WEB_SEARCH_URL
|
||||
app.state.config.EXTERNAL_WEB_SEARCH_API_KEY = EXTERNAL_WEB_SEARCH_API_KEY
|
||||
app.state.config.EXTERNAL_WEB_LOADER_URL = EXTERNAL_WEB_LOADER_URL
|
||||
app.state.config.EXTERNAL_WEB_LOADER_API_KEY = EXTERNAL_WEB_LOADER_API_KEY
|
||||
app.state.config.YANDEX_WEB_SEARCH_URL = YANDEX_WEB_SEARCH_URL
|
||||
app.state.config.YANDEX_WEB_SEARCH_API_KEY = YANDEX_WEB_SEARCH_API_KEY
|
||||
app.state.config.YANDEX_WEB_SEARCH_CONFIG = YANDEX_WEB_SEARCH_CONFIG
|
||||
|
||||
|
||||
app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL
|
||||
|
||||
147
backend/open_webui/retrieval/web/yandex.py
Normal file
147
backend/open_webui/retrieval/web/yandex.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, List
|
||||
|
||||
import requests
|
||||
|
||||
from fastapi import Request
|
||||
|
||||
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
||||
from open_webui.utils.headers import include_user_info_headers
|
||||
|
||||
from xml.etree import ElementTree as ET
|
||||
from xml.etree.ElementTree import Element
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def xml_element_contents_to_string(element: Element) -> str:
|
||||
buffer = [element.text if element.text else ""]
|
||||
|
||||
for child in element:
|
||||
buffer.append(xml_element_contents_to_string(child))
|
||||
|
||||
buffer.append(element.tail if element.tail else "")
|
||||
|
||||
return "".join(buffer)
|
||||
|
||||
|
||||
def search_yandex(
|
||||
request: Request,
|
||||
yandex_search_url: str,
|
||||
yandex_search_api_key: str,
|
||||
yandex_search_config: str,
|
||||
query: str,
|
||||
count: int,
|
||||
filter_list: Optional[List[str]] = None,
|
||||
user=None,
|
||||
) -> List[SearchResult]:
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
|
||||
"Authorization": f"Api-Key {yandex_search_api_key}",
|
||||
}
|
||||
|
||||
if user is not None:
|
||||
headers = include_user_info_headers(headers, user)
|
||||
|
||||
chat_id = getattr(request.state, "chat_id", None)
|
||||
if chat_id:
|
||||
headers["X-OpenWebUI-Chat-Id"] = str(chat_id)
|
||||
|
||||
payload = {} if yandex_search_config == "" else json.loads(yandex_search_config)
|
||||
|
||||
if type(payload.get("query", None)) != dict:
|
||||
payload["query"] = {}
|
||||
|
||||
if "searchType" not in payload["query"]:
|
||||
payload["query"]["searchType"] = "SEARCH_TYPE_RU"
|
||||
|
||||
payload["query"]["queryText"] = query
|
||||
|
||||
if type(payload.get("groupSpec", None)) != dict:
|
||||
payload["groupSpec"] = {}
|
||||
|
||||
if "groupMode" not in payload["groupSpec"]:
|
||||
payload["groupSpec"]["groupMode"] = "GROUP_MODE_DEEP"
|
||||
|
||||
payload["groupSpec"]["groupsOnPage"] = count
|
||||
payload["groupSpec"]["docsInGroup"] = 1
|
||||
|
||||
response = requests.post(
|
||||
"https://searchapi.api.cloud.yandex.net/v2/web/search" if yandex_search_url == "" else yandex_search_url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
response_body = response.json()
|
||||
if "rawData" not in response_body:
|
||||
raise Exception(f"No `rawData` in response body: {response_body}")
|
||||
|
||||
search_result_body_bytes = base64.decodebytes(bytes(response_body["rawData"], "utf-8"))
|
||||
|
||||
doc_root = ET.parse(io.BytesIO(search_result_body_bytes))
|
||||
|
||||
results = []
|
||||
|
||||
for group in doc_root.findall("response/results/grouping/group"):
|
||||
results.append({
|
||||
"url": xml_element_contents_to_string(group.find("doc/url")).strip("\n"),
|
||||
"title": xml_element_contents_to_string(group.find("doc/title")).strip("\n"),
|
||||
"snippet": xml_element_contents_to_string(group.find("doc/passages/passage")),
|
||||
})
|
||||
|
||||
results = get_filtered_results(results, filter_list)
|
||||
|
||||
results = [
|
||||
SearchResult(
|
||||
link=result.get("url"),
|
||||
title=result.get("title"),
|
||||
snippet=result.get("snippet"),
|
||||
)
|
||||
for result in results[:count]
|
||||
]
|
||||
|
||||
log.info(f"Yandex search results: {results}")
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
log.error(f"Error in search: {e}")
|
||||
|
||||
return []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from starlette.datastructures import Headers
|
||||
from fastapi import FastAPI
|
||||
|
||||
result = search_yandex(
|
||||
Request(
|
||||
{
|
||||
"type": "http",
|
||||
"asgi.version": "3.0",
|
||||
"asgi.spec_version": "2.0",
|
||||
"method": "GET",
|
||||
"path": "/internal",
|
||||
"query_string": b"",
|
||||
"headers": Headers({}).raw,
|
||||
"client": ("127.0.0.1", 12345),
|
||||
"server": ("127.0.0.1", 80),
|
||||
"scheme": "http",
|
||||
"app": FastAPI(),
|
||||
},
|
||||
None,
|
||||
),
|
||||
os.environ.get("YANDEX_WEB_SEARCH_URL", ""),
|
||||
os.environ.get("YANDEX_WEB_SEARCH_API_KEY", ""),
|
||||
os.environ.get("YANDEX_WEB_SEARCH_CONFIG", "{\"query\": {\"searchType\": \"SEARCH_TYPE_COM\"}}"),
|
||||
"TOP movies of the past year",
|
||||
3,
|
||||
)
|
||||
|
||||
print(result)
|
||||
@@ -76,6 +76,7 @@ from open_webui.retrieval.web.perplexity import search_perplexity
|
||||
from open_webui.retrieval.web.sougou import search_sougou
|
||||
from open_webui.retrieval.web.firecrawl import search_firecrawl
|
||||
from open_webui.retrieval.web.external import search_external
|
||||
from open_webui.retrieval.web.yandex import search_yandex
|
||||
|
||||
from open_webui.retrieval.utils import (
|
||||
get_content_from_url,
|
||||
@@ -578,6 +579,9 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||
"YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
"YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL,
|
||||
"YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
|
||||
"YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -641,6 +645,9 @@ class WebConfig(BaseModel):
|
||||
YOUTUBE_LOADER_LANGUAGE: Optional[List[str]] = None
|
||||
YOUTUBE_LOADER_PROXY_URL: Optional[str] = None
|
||||
YOUTUBE_LOADER_TRANSLATION: Optional[str] = None
|
||||
YANDEX_WEB_SEARCH_URL: Optional[str] = None
|
||||
YANDEX_WEB_SEARCH_API_KEY: Optional[str] = None
|
||||
YANDEX_WEB_SEARCH_CONFIG: Optional[str] = None
|
||||
|
||||
|
||||
class ConfigForm(BaseModel):
|
||||
@@ -1176,6 +1183,15 @@ async def update_rag_config(
|
||||
request.app.state.YOUTUBE_LOADER_TRANSLATION = (
|
||||
form_data.web.YOUTUBE_LOADER_TRANSLATION
|
||||
)
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_URL = (
|
||||
form_data.web.YANDEX_WEB_SEARCH_URL
|
||||
)
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_API_KEY = (
|
||||
form_data.web.YANDEX_WEB_SEARCH_API_KEY
|
||||
)
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_CONFIG = (
|
||||
form_data.web.YANDEX_WEB_SEARCH_CONFIG
|
||||
)
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
@@ -1300,6 +1316,9 @@ async def update_rag_config(
|
||||
"YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
|
||||
"YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
"YANDEX_WEB_SEARCH_URL": request.app.state.config.YANDEX_WEB_SEARCH_URL,
|
||||
"YANDEX_WEB_SEARCH_API_KEY": request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
|
||||
"YANDEX_WEB_SEARCH_CONFIG": request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -2240,6 +2259,17 @@ def search_web(
|
||||
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
user=user,
|
||||
)
|
||||
elif engine == "yandex":
|
||||
return search_yandex(
|
||||
request,
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_URL,
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_API_KEY,
|
||||
request.app.state.config.YANDEX_WEB_SEARCH_CONFIG,
|
||||
query,
|
||||
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
user=user,
|
||||
)
|
||||
else:
|
||||
raise Exception("No search engine API key found in environment variables")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user