mirror of
https://github.com/open-webui/open-webui
synced 2024-11-06 08:56:39 +00:00
7e761a69a7
Accept additional parameters such as language, time_range, and categories to tailor the search results. Raise an exception if a request error occurs during the search process. Use params argument to construct the query string Sort by relevance Expand docstring
80 lines
2.8 KiB
Python
80 lines
2.8 KiB
Python
import logging
|
|
import requests
|
|
|
|
from typing import List
|
|
|
|
from apps.rag.search.main import SearchResult
|
|
from config import SRC_LOG_LEVELS
|
|
|
|
log = logging.getLogger(__name__)
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
|
|
|
|
def search_searxng(query_url: str, query: str, count: int, **kwargs) -> List[SearchResult]:
|
|
"""
|
|
Search a SearXNG instance for a given query and return the results as a list of SearchResult objects.
|
|
|
|
The function allows passing additional parameters such as language or time_range to tailor the search result.
|
|
|
|
Args:
|
|
query_url (str): The base URL of the SearXNG server with a placeholder for the query "<query>".
|
|
query (str): The search term or question to find in the SearXNG database.
|
|
count (int): The maximum number of results to retrieve from the search.
|
|
|
|
Keyword Args:
|
|
language (str): Language filter for the search results; e.g., "en-US". Defaults to an empty string.
|
|
time_range (str): Time range for filtering results by date; e.g., "2023-04-05..today" or "all-time". Defaults to ''.
|
|
categories: (Optional[List[str]]): Specific categories within which the search should be performed, defaulting to an empty string if not provided.
|
|
|
|
Returns:
|
|
List[SearchResult]: A list of SearchResults sorted by relevance score in descending order.
|
|
|
|
Raise:
|
|
requests.exceptions.RequestException: If a request error occurs during the search process.
|
|
"""
|
|
|
|
# Default values for optional parameters are provided as empty strings or None when not specified.
|
|
language = kwargs.get('language', 'en-US')
|
|
time_range = kwargs.get('time_range', '')
|
|
categories = ''.join(kwargs.get('categories', []))
|
|
|
|
params = {
|
|
"q": query,
|
|
"format": "json",
|
|
"pageno": 1,
|
|
"results_per_page": count,
|
|
'language': language,
|
|
'time_range': time_range,
|
|
'engines': '',
|
|
'categories': categories,
|
|
'theme': 'simple',
|
|
'image_proxy': 0
|
|
|
|
}
|
|
|
|
log.info(f"searching {query_url}")
|
|
|
|
response = requests.get(
|
|
query_url,
|
|
headers={
|
|
"User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
|
|
"Accept": "text/html",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
"Connection": "keep-alive",
|
|
},
|
|
params=params,
|
|
)
|
|
|
|
response.raise_for_status() # Raise an exception for HTTP errors.
|
|
|
|
json_response = response.json()
|
|
results = json_response.get("results", [])
|
|
sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
|
|
return [
|
|
SearchResult(
|
|
link=result["url"], title=result.get("title"), snippet=result.get("content")
|
|
)
|
|
for result in sorted_results
|
|
]
|