From 7fefbb316dd3b58dd4b2139eb92d6cb3d3fb24a2 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Mon, 3 Jun 2024 21:13:10 +0100 Subject: [PATCH] fix: add backwards compat with older searxng urls --- backend/apps/rag/search/searxng.py | 40 +++++++++++++++++------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/backend/apps/rag/search/searxng.py b/backend/apps/rag/search/searxng.py index 24f9985a9..bf38ce0d3 100644 --- a/backend/apps/rag/search/searxng.py +++ b/backend/apps/rag/search/searxng.py @@ -10,48 +10,54 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) -def search_searxng(query_url: str, query: str, count: int, **kwargs) -> List[SearchResult]: +def search_searxng( + query_url: str, query: str, count: int, **kwargs +) -> List[SearchResult]: """ Search a SearXNG instance for a given query and return the results as a list of SearchResult objects. - + The function allows passing additional parameters such as language or time_range to tailor the search result. Args: - query_url (str): The base URL of the SearXNG server with a placeholder for the query "". + query_url (str): The base URL of the SearXNG server. query (str): The search term or question to find in the SearXNG database. count (int): The maximum number of results to retrieve from the search. - + Keyword Args: language (str): Language filter for the search results; e.g., "en-US". Defaults to an empty string. time_range (str): Time range for filtering results by date; e.g., "2023-04-05..today" or "all-time". Defaults to ''. categories: (Optional[List[str]]): Specific categories within which the search should be performed, defaulting to an empty string if not provided. - + Returns: List[SearchResult]: A list of SearchResults sorted by relevance score in descending order. - + Raise: requests.exceptions.RequestException: If a request error occurs during the search process. """ - + # Default values for optional parameters are provided as empty strings or None when not specified. - language = kwargs.get('language', 'en-US') - time_range = kwargs.get('time_range', '') - categories = ''.join(kwargs.get('categories', [])) + language = kwargs.get("language", "en-US") + time_range = kwargs.get("time_range", "") + categories = "".join(kwargs.get("categories", [])) params = { "q": query, "format": "json", "pageno": 1, "results_per_page": count, - 'language': language, - 'time_range': time_range, - 'engines': '', - 'categories': categories, - 'theme': 'simple', - 'image_proxy': 0 - + "language": language, + "time_range": time_range, + "engines": "", + "categories": categories, + "theme": "simple", + "image_proxy": 0, } + # Legacy query format + if "" in query_url: + # Strip all query parameters from the URL + query_url = query_url.split("?")[0] + log.debug(f"searching {query_url}") response = requests.get(