mirror of
https://github.com/open-webui/open-webui
synced 2024-11-16 21:42:58 +00:00
Merge pull request #4974 from SearchApi/add-search-api
feat: Add support for SearchApi as alternative to WebSearch
This commit is contained in:
commit
d3bcfd4d5f
@ -37,7 +37,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature-
|
|||||||
|
|
||||||
- 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.
|
- 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.
|
||||||
|
|
||||||
- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo` and `TavilySearch` and inject the results directly into your chat experience.
|
- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch` and `SearchApi` and inject the results directly into your chat experience.
|
||||||
|
|
||||||
- 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.
|
- 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.
|
||||||
|
|
||||||
|
@ -76,6 +76,7 @@ from apps.rag.search.serply import search_serply
|
|||||||
from apps.rag.search.duckduckgo import search_duckduckgo
|
from apps.rag.search.duckduckgo import search_duckduckgo
|
||||||
from apps.rag.search.tavily import search_tavily
|
from apps.rag.search.tavily import search_tavily
|
||||||
from apps.rag.search.jina_search import search_jina
|
from apps.rag.search.jina_search import search_jina
|
||||||
|
from apps.rag.search.searchapi import search_searchapi
|
||||||
|
|
||||||
from utils.misc import (
|
from utils.misc import (
|
||||||
calculate_sha256,
|
calculate_sha256,
|
||||||
@ -128,6 +129,8 @@ from config import (
|
|||||||
SERPER_API_KEY,
|
SERPER_API_KEY,
|
||||||
SERPLY_API_KEY,
|
SERPLY_API_KEY,
|
||||||
TAVILY_API_KEY,
|
TAVILY_API_KEY,
|
||||||
|
SEARCHAPI_API_KEY,
|
||||||
|
SEARCHAPI_ENGINE,
|
||||||
RAG_WEB_SEARCH_RESULT_COUNT,
|
RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
RAG_EMBEDDING_OPENAI_BATCH_SIZE,
|
RAG_EMBEDDING_OPENAI_BATCH_SIZE,
|
||||||
@ -189,6 +192,8 @@ app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
|
|||||||
app.state.config.SERPER_API_KEY = SERPER_API_KEY
|
app.state.config.SERPER_API_KEY = SERPER_API_KEY
|
||||||
app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
|
app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
|
||||||
app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
|
app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
|
||||||
|
app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
|
||||||
|
app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
|
||||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
||||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
||||||
|
|
||||||
@ -427,6 +432,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
|||||||
"serper_api_key": app.state.config.SERPER_API_KEY,
|
"serper_api_key": app.state.config.SERPER_API_KEY,
|
||||||
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
||||||
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
||||||
|
"searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
||||||
|
"seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
||||||
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
},
|
},
|
||||||
@ -466,6 +473,8 @@ class WebSearchConfig(BaseModel):
|
|||||||
serper_api_key: Optional[str] = None
|
serper_api_key: Optional[str] = None
|
||||||
serply_api_key: Optional[str] = None
|
serply_api_key: Optional[str] = None
|
||||||
tavily_api_key: Optional[str] = None
|
tavily_api_key: Optional[str] = None
|
||||||
|
searchapi_api_key: Optional[str] = None
|
||||||
|
searchapi_engine: Optional[str] = None
|
||||||
result_count: Optional[int] = None
|
result_count: Optional[int] = None
|
||||||
concurrent_requests: Optional[int] = None
|
concurrent_requests: Optional[int] = None
|
||||||
|
|
||||||
@ -529,6 +538,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|||||||
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
|
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
|
||||||
app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
|
app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
|
||||||
app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
|
app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
|
||||||
|
app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
|
||||||
|
app.state.config.SEARCHAPI_ENGINE = (
|
||||||
|
form_data.web.search.searchapi_engine
|
||||||
|
)
|
||||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
|
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
|
||||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
|
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
|
||||||
form_data.web.search.concurrent_requests
|
form_data.web.search.concurrent_requests
|
||||||
@ -566,6 +579,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|||||||
"serpstack_https": app.state.config.SERPSTACK_HTTPS,
|
"serpstack_https": app.state.config.SERPSTACK_HTTPS,
|
||||||
"serper_api_key": app.state.config.SERPER_API_KEY,
|
"serper_api_key": app.state.config.SERPER_API_KEY,
|
||||||
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
"serply_api_key": app.state.config.SERPLY_API_KEY,
|
||||||
|
"serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
|
||||||
|
"searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
|
||||||
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
"tavily_api_key": app.state.config.TAVILY_API_KEY,
|
||||||
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
@ -817,6 +832,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
|||||||
- SERPER_API_KEY
|
- SERPER_API_KEY
|
||||||
- SERPLY_API_KEY
|
- SERPLY_API_KEY
|
||||||
- TAVILY_API_KEY
|
- TAVILY_API_KEY
|
||||||
|
- SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
|
||||||
Args:
|
Args:
|
||||||
query (str): The query to search for
|
query (str): The query to search for
|
||||||
"""
|
"""
|
||||||
@ -904,6 +920,17 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception("No TAVILY_API_KEY found in environment variables")
|
raise Exception("No TAVILY_API_KEY found in environment variables")
|
||||||
|
elif engine == "searchapi":
|
||||||
|
if app.state.config.SEARCHAPI_API_KEY:
|
||||||
|
return search_searchapi(
|
||||||
|
app.state.config.SEARCHAPI_API_KEY,
|
||||||
|
app.state.config.SEARCHAPI_ENGINE,
|
||||||
|
query,
|
||||||
|
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||||
|
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception("No SEARCHAPI_API_KEY found in environment variables")
|
||||||
elif engine == "jina":
|
elif engine == "jina":
|
||||||
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
|
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
|
||||||
else:
|
else:
|
||||||
|
@ -8,7 +8,8 @@ def get_filtered_results(results, filter_list):
|
|||||||
return results
|
return results
|
||||||
filtered_results = []
|
filtered_results = []
|
||||||
for result in results:
|
for result in results:
|
||||||
domain = urlparse(result["url"]).netloc
|
url = result.get("url") or result.get("link", "")
|
||||||
|
domain = urlparse(url).netloc
|
||||||
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
||||||
filtered_results.append(result)
|
filtered_results.append(result)
|
||||||
return filtered_results
|
return filtered_results
|
||||||
|
50
backend/apps/rag/search/searchapi.py
Normal file
50
backend/apps/rag/search/searchapi.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
import requests
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
from apps.rag.search.main import SearchResult, get_filtered_results
|
||||||
|
from config import SRC_LOG_LEVELS
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||||
|
|
||||||
|
def search_searchapi(
|
||||||
|
api_key: str, engine: str, query: str, count: int, filter_list: Optional[list[str]] = None
|
||||||
|
) -> list[SearchResult]:
|
||||||
|
"""Search using searchapi.io's API and return the results as a list of SearchResult objects.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key (str): A searchapi.io API key
|
||||||
|
query (str): The query to search for
|
||||||
|
"""
|
||||||
|
url = "https://www.searchapi.io/api/v1/search"
|
||||||
|
|
||||||
|
engine = engine or "google"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"engine": engine,
|
||||||
|
"q": query,
|
||||||
|
"api_key": api_key
|
||||||
|
}
|
||||||
|
|
||||||
|
url = f"{url}?{urlencode(payload)}"
|
||||||
|
response = requests.request("GET", url)
|
||||||
|
|
||||||
|
json_response = response.json()
|
||||||
|
log.info(f"results from searchapi search: {json_response}")
|
||||||
|
|
||||||
|
results = sorted(
|
||||||
|
json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
|
||||||
|
)
|
||||||
|
if filter_list:
|
||||||
|
results = get_filtered_results(results, filter_list)
|
||||||
|
return [
|
||||||
|
SearchResult(
|
||||||
|
link=result["link"],
|
||||||
|
title=result["title"],
|
||||||
|
snippet=result["snippet"]
|
||||||
|
)
|
||||||
|
for result in results[:count]
|
||||||
|
]
|
383
backend/apps/rag/search/testdata/searchapi.json
vendored
Normal file
383
backend/apps/rag/search/testdata/searchapi.json
vendored
Normal file
File diff suppressed because one or more lines are too long
@ -1237,6 +1237,18 @@ TAVILY_API_KEY = PersistentConfig(
|
|||||||
os.getenv("TAVILY_API_KEY", ""),
|
os.getenv("TAVILY_API_KEY", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
SEARCHAPI_API_KEY = PersistentConfig(
|
||||||
|
"SEARCHAPI_API_KEY",
|
||||||
|
"rag.web.search.searchapi_api_key",
|
||||||
|
os.getenv("SEARCHAPI_API_KEY", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
SEARCHAPI_ENGINE = PersistentConfig(
|
||||||
|
"SEARCHAPI_ENGINE",
|
||||||
|
"rag.web.search.searchapi_engine",
|
||||||
|
os.getenv("SEARCHAPI_ENGINE", ""),
|
||||||
|
)
|
||||||
|
|
||||||
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
||||||
"RAG_WEB_SEARCH_RESULT_COUNT",
|
"RAG_WEB_SEARCH_RESULT_COUNT",
|
||||||
"rag.web.search.result_count",
|
"rag.web.search.result_count",
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
'serpstack',
|
'serpstack',
|
||||||
'serper',
|
'serper',
|
||||||
'serply',
|
'serply',
|
||||||
|
'searchapi',
|
||||||
'duckduckgo',
|
'duckduckgo',
|
||||||
'tavily',
|
'tavily',
|
||||||
'jina'
|
'jina'
|
||||||
@ -182,6 +183,34 @@
|
|||||||
bind:value={webConfig.search.serply_api_key}
|
bind:value={webConfig.search.serply_api_key}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
{:else if webConfig.search.engine === 'searchapi'}
|
||||||
|
<div>
|
||||||
|
<div class=" self-center text-xs font-medium mb-1">
|
||||||
|
{$i18n.t('SearchApi API Key')}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<SensitiveInput
|
||||||
|
placeholder={$i18n.t('Enter SearchApi API Key')}
|
||||||
|
bind:value={webConfig.search.searchapi_api_key}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="mt-1.5">
|
||||||
|
<div class=" self-center text-xs font-medium mb-1">
|
||||||
|
{$i18n.t('SearchApi Engine')}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||||
|
type="text"
|
||||||
|
placeholder={$i18n.t('Enter SearchApi Engine')}
|
||||||
|
bind:value={webConfig.search.searchapi_engine}
|
||||||
|
autocomplete="off"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{:else if webConfig.search.engine === 'tavily'}
|
{:else if webConfig.search.engine === 'tavily'}
|
||||||
<div>
|
<div>
|
||||||
<div class=" self-center text-xs font-medium mb-1">
|
<div class=" self-center text-xs font-medium mb-1">
|
||||||
|
Loading…
Reference in New Issue
Block a user