Merge pull request #4974 from SearchApi/add-search-api

feat: Add support for SearchApi as alternative to WebSearch
This commit is contained in:
Timothy Jaeryang Baek 2024-08-30 19:42:36 +02:00 committed by GitHub
commit d3bcfd4d5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 504 additions and 2 deletions

View File

@ -37,7 +37,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature-
- 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query. - 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.
- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo` and `TavilySearch` and inject the results directly into your chat experience. - 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch` and `SearchApi` and inject the results directly into your chat experience.
- 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions. - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.

View File

@ -76,6 +76,7 @@ from apps.rag.search.serply import search_serply
from apps.rag.search.duckduckgo import search_duckduckgo from apps.rag.search.duckduckgo import search_duckduckgo
from apps.rag.search.tavily import search_tavily from apps.rag.search.tavily import search_tavily
from apps.rag.search.jina_search import search_jina from apps.rag.search.jina_search import search_jina
from apps.rag.search.searchapi import search_searchapi
from utils.misc import ( from utils.misc import (
calculate_sha256, calculate_sha256,
@ -128,6 +129,8 @@ from config import (
SERPER_API_KEY, SERPER_API_KEY,
SERPLY_API_KEY, SERPLY_API_KEY,
TAVILY_API_KEY, TAVILY_API_KEY,
SEARCHAPI_API_KEY,
SEARCHAPI_ENGINE,
RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_RESULT_COUNT,
RAG_WEB_SEARCH_CONCURRENT_REQUESTS, RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
RAG_EMBEDDING_OPENAI_BATCH_SIZE, RAG_EMBEDDING_OPENAI_BATCH_SIZE,
@ -189,6 +192,8 @@ app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
app.state.config.SERPER_API_KEY = SERPER_API_KEY app.state.config.SERPER_API_KEY = SERPER_API_KEY
app.state.config.SERPLY_API_KEY = SERPLY_API_KEY app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
app.state.config.TAVILY_API_KEY = TAVILY_API_KEY app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
@ -427,6 +432,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
"serper_api_key": app.state.config.SERPER_API_KEY, "serper_api_key": app.state.config.SERPER_API_KEY,
"serply_api_key": app.state.config.SERPLY_API_KEY, "serply_api_key": app.state.config.SERPLY_API_KEY,
"tavily_api_key": app.state.config.TAVILY_API_KEY, "tavily_api_key": app.state.config.TAVILY_API_KEY,
"searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
"seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
}, },
@ -466,6 +473,8 @@ class WebSearchConfig(BaseModel):
serper_api_key: Optional[str] = None serper_api_key: Optional[str] = None
serply_api_key: Optional[str] = None serply_api_key: Optional[str] = None
tavily_api_key: Optional[str] = None tavily_api_key: Optional[str] = None
searchapi_api_key: Optional[str] = None
searchapi_engine: Optional[str] = None
result_count: Optional[int] = None result_count: Optional[int] = None
concurrent_requests: Optional[int] = None concurrent_requests: Optional[int] = None
@ -529,6 +538,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
app.state.config.SEARCHAPI_ENGINE = (
form_data.web.search.searchapi_engine
)
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
form_data.web.search.concurrent_requests form_data.web.search.concurrent_requests
@ -566,6 +579,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
"serpstack_https": app.state.config.SERPSTACK_HTTPS, "serpstack_https": app.state.config.SERPSTACK_HTTPS,
"serper_api_key": app.state.config.SERPER_API_KEY, "serper_api_key": app.state.config.SERPER_API_KEY,
"serply_api_key": app.state.config.SERPLY_API_KEY, "serply_api_key": app.state.config.SERPLY_API_KEY,
"serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
"searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
"tavily_api_key": app.state.config.TAVILY_API_KEY, "tavily_api_key": app.state.config.TAVILY_API_KEY,
"result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
"concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
@ -817,6 +832,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
- SERPER_API_KEY - SERPER_API_KEY
- SERPLY_API_KEY - SERPLY_API_KEY
- TAVILY_API_KEY - TAVILY_API_KEY
- SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
Args: Args:
query (str): The query to search for query (str): The query to search for
""" """
@ -904,6 +920,17 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
) )
else: else:
raise Exception("No TAVILY_API_KEY found in environment variables") raise Exception("No TAVILY_API_KEY found in environment variables")
elif engine == "searchapi":
if app.state.config.SEARCHAPI_API_KEY:
return search_searchapi(
app.state.config.SEARCHAPI_API_KEY,
app.state.config.SEARCHAPI_ENGINE,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
)
else:
raise Exception("No SEARCHAPI_API_KEY found in environment variables")
elif engine == "jina": elif engine == "jina":
return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT) return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
else: else:

View File

@ -8,7 +8,8 @@ def get_filtered_results(results, filter_list):
return results return results
filtered_results = [] filtered_results = []
for result in results: for result in results:
domain = urlparse(result["url"]).netloc url = result.get("url") or result.get("link", "")
domain = urlparse(url).netloc
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
filtered_results.append(result) filtered_results.append(result)
return filtered_results return filtered_results

View File

@ -0,0 +1,50 @@
import json
import logging
from typing import Optional
import requests
from urllib.parse import urlencode
from apps.rag.search.main import SearchResult, get_filtered_results
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_searchapi(
api_key: str, engine: str, query: str, count: int, filter_list: Optional[list[str]] = None
) -> list[SearchResult]:
"""Search using searchapi.io's API and return the results as a list of SearchResult objects.
Args:
api_key (str): A searchapi.io API key
query (str): The query to search for
"""
url = "https://www.searchapi.io/api/v1/search"
engine = engine or "google"
payload = {
"engine": engine,
"q": query,
"api_key": api_key
}
url = f"{url}?{urlencode(payload)}"
response = requests.request("GET", url)
json_response = response.json()
log.info(f"results from searchapi search: {json_response}")
results = sorted(
json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
)
if filter_list:
results = get_filtered_results(results, filter_list)
return [
SearchResult(
link=result["link"],
title=result["title"],
snippet=result["snippet"]
)
for result in results[:count]
]

File diff suppressed because one or more lines are too long

View File

@ -1237,6 +1237,18 @@ TAVILY_API_KEY = PersistentConfig(
os.getenv("TAVILY_API_KEY", ""), os.getenv("TAVILY_API_KEY", ""),
) )
SEARCHAPI_API_KEY = PersistentConfig(
"SEARCHAPI_API_KEY",
"rag.web.search.searchapi_api_key",
os.getenv("SEARCHAPI_API_KEY", ""),
)
SEARCHAPI_ENGINE = PersistentConfig(
"SEARCHAPI_ENGINE",
"rag.web.search.searchapi_engine",
os.getenv("SEARCHAPI_ENGINE", ""),
)
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
"RAG_WEB_SEARCH_RESULT_COUNT", "RAG_WEB_SEARCH_RESULT_COUNT",
"rag.web.search.result_count", "rag.web.search.result_count",

View File

@ -19,6 +19,7 @@
'serpstack', 'serpstack',
'serper', 'serper',
'serply', 'serply',
'searchapi',
'duckduckgo', 'duckduckgo',
'tavily', 'tavily',
'jina' 'jina'
@ -182,6 +183,34 @@
bind:value={webConfig.search.serply_api_key} bind:value={webConfig.search.serply_api_key}
/> />
</div> </div>
{:else if webConfig.search.engine === 'searchapi'}
<div>
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('SearchApi API Key')}
</div>
<SensitiveInput
placeholder={$i18n.t('Enter SearchApi API Key')}
bind:value={webConfig.search.searchapi_api_key}
/>
</div>
<div class="mt-1.5">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('SearchApi Engine')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
type="text"
placeholder={$i18n.t('Enter SearchApi Engine')}
bind:value={webConfig.search.searchapi_engine}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.search.engine === 'tavily'} {:else if webConfig.search.engine === 'tavily'}
<div> <div>
<div class=" self-center text-xs font-medium mb-1"> <div class=" self-center text-xs font-medium mb-1">