From ea6b8984abfde2ae0cde8268ae1a3276bd650f35 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 1 Jun 2024 19:03:56 -0700 Subject: [PATCH] refac: web search --- backend/apps/rag/main.py | 44 +++++++++++++++++++++++ backend/config.py | 76 +++++++++++++++++++++++++++++++--------- backend/main.py | 3 +- 3 files changed, 104 insertions(+), 19 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index cc6cf81a3..27d7f47dd 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -96,6 +96,14 @@ from config import ( RAG_TEMPLATE, ENABLE_RAG_LOCAL_WEB_FETCH, YOUTUBE_LOADER_LANGUAGE, + ENABLE_RAG_WEB_SEARCH, + SEARXNG_QUERY_URL, + GOOGLE_PSE_API_KEY, + GOOGLE_PSE_ENGINE_ID, + SERPSTACK_API_KEY, + SERPSTACK_HTTPS, + SERPER_API_KEY, + RAG_WEB_SEARCH_RESULT_COUNT, RAG_WEB_SEARCH_CONCURRENT_REQUESTS, AppConfig, ) @@ -136,6 +144,17 @@ app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE app.state.YOUTUBE_LOADER_TRANSLATION = None +app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH +app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL +app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY +app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID +app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY +app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS +app.state.config.SERPER_API_KEY = SERPER_API_KEY +app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT +app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS + + def update_embedding_model( embedding_model: str, update_model: bool = False, @@ -337,6 +356,19 @@ async def get_rag_config(user=Depends(get_admin_user)): "language": app.state.config.YOUTUBE_LOADER_LANGUAGE, "translation": app.state.YOUTUBE_LOADER_TRANSLATION, }, + "web": { + "search": { + "enable": app.state.config.ENABLE_RAG_WEB_SEARCH, + "searxng_query_url": app.state.config.SEARXNG_QUERY_URL, + "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY, + "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID, + "serpstack_api_key": app.state.config.SERPSTACK_API_KEY, + "serpstack_https": app.state.config.SERPSTACK_HTTPS, + "serper_api_key": app.state.config.SERPER_API_KEY, + "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, + } + }, } @@ -350,6 +382,18 @@ class YoutubeLoaderConfig(BaseModel): translation: Optional[str] = None +class WebSearchConfig(BaseModel): + enable: bool + searxng_query_url: Optional[str] = None + google_pse_api_key: Optional[str] = None + google_pse_engine_id: Optional[str] = None + serpstack_api_key: Optional[str] = None + serpstack_https: Optional[bool] = None + serper_api_key: Optional[str] = None + result_count: Optional[int] = None + concurrent_requests: Optional[int] = None + + class ConfigUpdateForm(BaseModel): pdf_extract_images: Optional[bool] = None chunk: Optional[ChunkParamUpdateForm] = None diff --git a/backend/config.py b/backend/config.py index dfdeee732..8f97458f2 100644 --- a/backend/config.py +++ b/backend/config.py @@ -766,28 +766,70 @@ YOUTUBE_LOADER_LANGUAGE = PersistentConfig( os.getenv("YOUTUBE_LOADER_LANGUAGE", "en").split(","), ) -SEARXNG_QUERY_URL = os.getenv("SEARXNG_QUERY_URL", "") -GOOGLE_PSE_API_KEY = os.getenv("GOOGLE_PSE_API_KEY", "") -GOOGLE_PSE_ENGINE_ID = os.getenv("GOOGLE_PSE_ENGINE_ID", "") -BRAVE_SEARCH_API_KEY = os.getenv("BRAVE_SEARCH_API_KEY", "") -SERPSTACK_API_KEY = os.getenv("SERPSTACK_API_KEY", "") -SERPSTACK_HTTPS = os.getenv("SERPSTACK_HTTPS", "True").lower() == "true" -SERPER_API_KEY = os.getenv("SERPER_API_KEY", "") - -RAG_WEB_SEARCH_ENABLED = ( - SEARXNG_QUERY_URL != "" - or (GOOGLE_PSE_API_KEY != "" and GOOGLE_PSE_ENGINE_ID != "") - or BRAVE_SEARCH_API_KEY != "" - or SERPSTACK_API_KEY != "" - or SERPER_API_KEY != "" +ENABLE_RAG_WEB_SEARCH = PersistentConfig( + "ENABLE_RAG_WEB_SEARCH", + "rag.web.search.enable", + os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true", ) -RAG_WEB_SEARCH_RESULT_COUNT = int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")) -RAG_WEB_SEARCH_CONCURRENT_REQUESTS = int( - os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10") + +SEARXNG_QUERY_URL = PersistentConfig( + "SEARXNG_QUERY_URL", + "rag.web.search.searxng_query_url", + os.getenv("SEARXNG_QUERY_URL", ""), ) +GOOGLE_PSE_API_KEY = PersistentConfig( + "GOOGLE_PSE_API_KEY", + "rag.web.search.google_pse_api_key", + os.getenv("GOOGLE_PSE_API_KEY", ""), +) + +GOOGLE_PSE_ENGINE_ID = PersistentConfig( + "GOOGLE_PSE_ENGINE_ID", + "rag.web.search.google_pse_engine_id", + os.getenv("GOOGLE_PSE_ENGINE_ID", ""), +) + +BRAVE_SEARCH_API_KEY = PersistentConfig( + "BRAVE_SEARCH_API_KEY", + "rag.web.search.brave_search_api_key", + os.getenv("BRAVE_SEARCH_API_KEY", ""), +) + +SERPSTACK_API_KEY = PersistentConfig( + "SERPSTACK_API_KEY", + "rag.web.search.serpstack_api_key", + os.getenv("SERPSTACK_API_KEY", ""), +) + +SERPSTACK_HTTPS = PersistentConfig( + "SERPSTACK_HTTPS", + "rag.web.search.serpstack_https", + os.getenv("SERPSTACK_HTTPS", "True").lower() == "true", +) + +SERPER_API_KEY = PersistentConfig( + "SERPER_API_KEY", + "rag.web.search.serper_api_key", + os.getenv("SERPER_API_KEY", ""), +) + + +RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( + "RAG_WEB_SEARCH_RESULT_COUNT", + "rag.web.search.result_count", + int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")), +) + +RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( + "RAG_WEB_SEARCH_CONCURRENT_REQUESTS", + "rag.web.search.concurrent_requests", + int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), +) + + #################################### # Transcribe #################################### diff --git a/backend/main.py b/backend/main.py index a64543b41..44bf8ab74 100644 --- a/backend/main.py +++ b/backend/main.py @@ -60,7 +60,6 @@ from config import ( SRC_LOG_LEVELS, WEBHOOK_URL, ENABLE_ADMIN_EXPORT, - RAG_WEB_SEARCH_ENABLED, AppConfig, WEBUI_BUILD_HASH, ) @@ -825,7 +824,7 @@ async def get_app_config(): "auth": WEBUI_AUTH, "auth_trusted_header": bool(webui_app.state.AUTH_TRUSTED_EMAIL_HEADER), "enable_signup": webui_app.state.config.ENABLE_SIGNUP, - "enable_web_search": RAG_WEB_SEARCH_ENABLED, + "enable_web_search": rag_app.state.config.ENABLE_RAG_WEB_SEARCH, "enable_image_generation": images_app.state.config.ENABLED, "enable_community_sharing": webui_app.state.config.ENABLE_COMMUNITY_SHARING, "enable_admin_export": ENABLE_ADMIN_EXPORT,