feat: Backend for Self-Hosted/External Web Search/Loader Engines

This commit is contained in:
tth37
2025-04-14 01:49:05 +08:00
parent 8158609c93
commit 839ba22c90
6 changed files with 149 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
import logging
from typing import Optional, List
import requests
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_external(
external_url: str,
external_api_key: str,
query: str,
count: int,
filter_list: Optional[List[str]] = None,
) -> List[SearchResult]:
try:
response = requests.get(
external_url,
headers={
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
"Authorization": f"Bearer {external_api_key}",
},
params={
"query": query,
"count": count,
}
)
response.raise_for_status()
results = response.json()
if filter_list:
results = get_filtered_results(results, filter_list)
return [
SearchResult(
link=result.get("link"),
title=result.get("title"),
snippet=result.get("snippet"),
)
for result in results[:count]
]
except Exception as e:
log.error(f"Error in External search: {e}")
return []

View File

@@ -25,6 +25,7 @@ from langchain_community.document_loaders.firecrawl import FireCrawlLoader
from langchain_community.document_loaders.base import BaseLoader
from langchain_core.documents import Document
from open_webui.retrieval.loaders.tavily import TavilyLoader
from open_webui.retrieval.loaders.external import ExternalLoader
from open_webui.constants import ERROR_MESSAGES
from open_webui.config import (
ENABLE_RAG_LOCAL_WEB_FETCH,
@@ -35,6 +36,8 @@ from open_webui.config import (
FIRECRAWL_API_KEY,
TAVILY_API_KEY,
TAVILY_EXTRACT_DEPTH,
EXTERNAL_WEB_LOADER_URL,
EXTERNAL_WEB_LOADER_API_KEY,
)
from open_webui.env import SRC_LOG_LEVELS
@@ -619,6 +622,11 @@ def get_web_loader(
web_loader_args["api_key"] = TAVILY_API_KEY.value
web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value
if WEB_LOADER_ENGINE.value == "external":
WebLoaderClass = ExternalLoader
web_loader_args["external_url"] = EXTERNAL_WEB_LOADER_URL.value
web_loader_args["external_api_key"] = EXTERNAL_WEB_LOADER_API_KEY.value
if WebLoaderClass:
web_loader = WebLoaderClass(**web_loader_args)