mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Merge pull request #4974 from SearchApi/add-search-api
feat: Add support for SearchApi as alternative to WebSearch
This commit is contained in:
@@ -76,6 +76,7 @@ from apps.rag.search.serply import search_serply
|
||||
from apps.rag.search.duckduckgo import search_duckduckgo
|
||||
from apps.rag.search.tavily import search_tavily
|
||||
from apps.rag.search.jina_search import search_jina
|
||||
from apps.rag.search.searchapi import search_searchapi
|
||||
|
||||
from utils.misc import (
|
||||
calculate_sha256,
|
||||
@@ -128,6 +129,8 @@ from config import (
|
||||
SERPER_API_KEY,
|
||||
SERPLY_API_KEY,
|
||||
TAVILY_API_KEY,
|
||||
SEARCHAPI_API_KEY,
|
||||
SEARCHAPI_ENGINE,
|
||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
RAG_EMBEDDING_OPENAI_BATCH_SIZE,
|
||||
@@ -189,6 +192,8 @@ app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
|
||||
app.state.config.SERPER_API_KEY = SERPER_API_KEY
|
||||
app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
|
||||
app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
|
||||
app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
|
||||
app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
|
||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
||||
|
||||
@@ -427,6 +432,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
||||
"serper_api_key": app.state.config.SERPER_API_KEY,
|
||||
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
||||
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
||||
"searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
||||
"seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
||||
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
},
|
||||
@@ -466,6 +473,8 @@ class WebSearchConfig(BaseModel):
|
||||
serper_api_key: Optional[str] = None
|
||||
serply_api_key: Optional[str] = None
|
||||
tavily_api_key: Optional[str] = None
|
||||
searchapi_api_key: Optional[str] = None
|
||||
searchapi_engine: Optional[str] = None
|
||||
result_count: Optional[int] = None
|
||||
concurrent_requests: Optional[int] = None
|
||||
|
||||
@@ -529,6 +538,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
|
||||
app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
|
||||
app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
|
||||
app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
|
||||
app.state.config.SEARCHAPI_ENGINE = (
|
||||
form_data.web.search.searchapi_engine
|
||||
)
|
||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
|
||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
|
||||
form_data.web.search.concurrent_requests
|
||||
@@ -566,6 +579,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
"serpstack_https": app.state.config.SERPSTACK_HTTPS,
|
||||
"serper_api_key": app.state.config.SERPER_API_KEY,
|
||||
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
||||
"serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
||||
"searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
||||
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
||||
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
@@ -817,6 +832,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
||||
- SERPER_API_KEY
|
||||
- SERPLY_API_KEY
|
||||
- TAVILY_API_KEY
|
||||
- SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
|
||||
Args:
|
||||
query (str): The query to search for
|
||||
"""
|
||||
@@ -904,6 +920,17 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
||||
)
|
||||
else:
|
||||
raise Exception("No TAVILY_API_KEY found in environment variables")
|
||||
elif engine == "searchapi":
|
||||
if app.state.config.SEARCHAPI_API_KEY:
|
||||
return search_searchapi(
|
||||
app.state.config.SEARCHAPI_API_KEY,
|
||||
app.state.config.SEARCHAPI_ENGINE,
|
||||
query,
|
||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
)
|
||||
else:
|
||||
raise Exception("No SEARCHAPI_API_KEY found in environment variables")
|
||||
elif engine == "jina":
|
||||
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
|
||||
else:
|
||||
|
||||
@@ -8,7 +8,8 @@ def get_filtered_results(results, filter_list):
|
||||
return results
|
||||
filtered_results = []
|
||||
for result in results:
|
||||
domain = urlparse(result["url"]).netloc
|
||||
url = result.get("url") or result.get("link", "")
|
||||
domain = urlparse(url).netloc
|
||||
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
||||
filtered_results.append(result)
|
||||
return filtered_results
|
||||
|
||||
50
backend/apps/rag/search/searchapi.py
Normal file
50
backend/apps/rag/search/searchapi.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Optional
|
||||
import requests
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from apps.rag.search.main import SearchResult, get_filtered_results
|
||||
from config import SRC_LOG_LEVELS
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
def search_searchapi(
|
||||
api_key: str, engine: str, query: str, count: int, filter_list: Optional[list[str]] = None
|
||||
) -> list[SearchResult]:
|
||||
"""Search using searchapi.io's API and return the results as a list of SearchResult objects.
|
||||
|
||||
Args:
|
||||
api_key (str): A searchapi.io API key
|
||||
query (str): The query to search for
|
||||
"""
|
||||
url = "https://www.searchapi.io/api/v1/search"
|
||||
|
||||
engine = engine or "google"
|
||||
|
||||
payload = {
|
||||
"engine": engine,
|
||||
"q": query,
|
||||
"api_key": api_key
|
||||
}
|
||||
|
||||
url = f"{url}?{urlencode(payload)}"
|
||||
response = requests.request("GET", url)
|
||||
|
||||
json_response = response.json()
|
||||
log.info(f"results from searchapi search: {json_response}")
|
||||
|
||||
results = sorted(
|
||||
json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
|
||||
)
|
||||
if filter_list:
|
||||
results = get_filtered_results(results, filter_list)
|
||||
return [
|
||||
SearchResult(
|
||||
link=result["link"],
|
||||
title=result["title"],
|
||||
snippet=result["snippet"]
|
||||
)
|
||||
for result in results[:count]
|
||||
]
|
||||
383
backend/apps/rag/search/testdata/searchapi.json
vendored
Normal file
383
backend/apps/rag/search/testdata/searchapi.json
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user