From fb8069123e49f703031be9d41e30bc85151b1a33 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Sat, 11 May 2024 19:07:36 +0800 Subject: [PATCH] feat: add WEB_SEARCH_RESULT_COUNT to control max number of results --- backend/apps/rag/search/brave.py | 6 +++--- backend/apps/rag/search/google_pse.py | 4 ++-- backend/apps/rag/search/searxng.py | 4 ++-- backend/apps/rag/search/serper.py | 4 ++-- backend/apps/rag/search/serpstack.py | 4 ++-- backend/config.py | 1 + 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/backend/apps/rag/search/brave.py b/backend/apps/rag/search/brave.py index 1ead43e53..91efaf396 100644 --- a/backend/apps/rag/search/brave.py +++ b/backend/apps/rag/search/brave.py @@ -3,7 +3,7 @@ import logging import requests from apps.rag.search.main import SearchResult -from config import SRC_LOG_LEVELS +from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -22,7 +22,7 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]: "Accept-Encoding": "gzip", "X-Subscription-Token": api_key, } - params = {"q": query, "count": 5} + params = {"q": query, "count": WEB_SEARCH_RESULT_COUNT} response = requests.get(url, headers=headers, params=params) response.raise_for_status() @@ -33,5 +33,5 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]: SearchResult( link=result["url"], title=result.get("title"), snippet=result.get("snippet") ) - for result in results[:5] + for result in results[:WEB_SEARCH_RESULT_COUNT] ] diff --git a/backend/apps/rag/search/google_pse.py b/backend/apps/rag/search/google_pse.py index a2a8f4640..7b4a757a3 100644 --- a/backend/apps/rag/search/google_pse.py +++ b/backend/apps/rag/search/google_pse.py @@ -4,7 +4,7 @@ import logging import requests from apps.rag.search.main import SearchResult -from config import SRC_LOG_LEVELS +from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -27,7 +27,7 @@ def search_google_pse( "cx": search_engine_id, "q": query, "key": api_key, - "num": 5, + "num": WEB_SEARCH_RESULT_COUNT, } response = requests.request("GET", url, headers=headers, params=params) diff --git a/backend/apps/rag/search/searxng.py b/backend/apps/rag/search/searxng.py index f04b61186..dd2afd5f7 100644 --- a/backend/apps/rag/search/searxng.py +++ b/backend/apps/rag/search/searxng.py @@ -3,7 +3,7 @@ import logging import requests from apps.rag.search.main import SearchResult -from config import SRC_LOG_LEVELS +from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -40,5 +40,5 @@ def search_searxng(query_url: str, query: str) -> list[SearchResult]: SearchResult( link=result["url"], title=result.get("title"), snippet=result.get("content") ) - for result in sorted_results[:5] + for result in sorted_results[:WEB_SEARCH_RESULT_COUNT] ] diff --git a/backend/apps/rag/search/serper.py b/backend/apps/rag/search/serper.py index c7c18a895..8244ae0b6 100644 --- a/backend/apps/rag/search/serper.py +++ b/backend/apps/rag/search/serper.py @@ -4,7 +4,7 @@ import logging import requests from apps.rag.search.main import SearchResult -from config import SRC_LOG_LEVELS +from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -35,5 +35,5 @@ def search_serper(api_key: str, query: str) -> list[SearchResult]: title=result.get("title"), snippet=result.get("description"), ) - for result in results[:5] + for result in results[:WEB_SEARCH_RESULT_COUNT] ] diff --git a/backend/apps/rag/search/serpstack.py b/backend/apps/rag/search/serpstack.py index 3fca75499..5cbf601ec 100644 --- a/backend/apps/rag/search/serpstack.py +++ b/backend/apps/rag/search/serpstack.py @@ -4,7 +4,7 @@ import logging import requests from apps.rag.search.main import SearchResult -from config import SRC_LOG_LEVELS +from config import SRC_LOG_LEVELS, WEB_SEARCH_RESULT_COUNT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -39,5 +39,5 @@ def search_serpstack( SearchResult( link=result["url"], title=result.get("title"), snippet=result.get("snippet") ) - for result in results[:5] + for result in results[:WEB_SEARCH_RESULT_COUNT] ] diff --git a/backend/config.py b/backend/config.py index 24e124ea9..513fab482 100644 --- a/backend/config.py +++ b/backend/config.py @@ -549,6 +549,7 @@ BRAVE_SEARCH_API_KEY = os.getenv("BRAVE_SEARCH_API_KEY", "") SERPSTACK_API_KEY = os.getenv("SERPSTACK_API_KEY", "") SERPSTACK_HTTPS = os.getenv("SERPSTACK_HTTPS", "True").lower() == "true" SERPER_API_KEY = os.getenv("SERPER_API_KEY", "") +WEB_SEARCH_RESULT_COUNT = int(os.getenv("WEB_SEARCH_RESULT_COUNT", "10")) #################################### # Transcribe