From 48a23ce3fe06ca62faa7c1a19c95229126ad2b91 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 12 Apr 2025 16:33:36 -0700 Subject: [PATCH] refac: web/rag config --- backend/open_webui/config.py | 120 +- backend/open_webui/main.py | 41 +- backend/open_webui/retrieval/web/utils.py | 48 +- backend/open_webui/routers/retrieval.py | 873 ++++++------ backend/start.sh | 4 +- backend/start_windows.bat | 4 +- docker-compose.playwright.yaml | 4 +- src/lib/apis/retrieval/index.ts | 33 +- .../admin/Settings/Documents.svelte | 1206 ++++++++--------- .../admin/Settings/WebSearch.svelte | 540 ++++---- src/lib/utils/rag/index.ts | 24 - 11 files changed, 1367 insertions(+), 1530 deletions(-) delete mode 100644 src/lib/utils/rag/index.ts diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 635151c7e..e13ace668 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -201,7 +201,10 @@ def save_config(config): T = TypeVar("T") -ENABLE_PERSISTENT_CONFIG = os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true" +ENABLE_PERSISTENT_CONFIG = ( + os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true" +) + class PersistentConfig(Generic[T]): def __init__(self, env_name: str, config_path: str, env_value: T): @@ -612,10 +615,16 @@ def load_oauth_providers(): "scope": OAUTH_SCOPES.value, } - if OAUTH_CODE_CHALLENGE_METHOD.value and OAUTH_CODE_CHALLENGE_METHOD.value == "S256": + if ( + OAUTH_CODE_CHALLENGE_METHOD.value + and OAUTH_CODE_CHALLENGE_METHOD.value == "S256" + ): client_kwargs["code_challenge_method"] = "S256" elif OAUTH_CODE_CHALLENGE_METHOD.value: - raise Exception('Code challenge methods other than "%s" not supported. Given: "%s"' % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value)) + raise Exception( + 'Code challenge methods other than "%s" not supported. Given: "%s"' + % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value) + ) client.register( name="oidc", @@ -1820,12 +1829,6 @@ RAG_FILE_MAX_SIZE = PersistentConfig( ), ) -ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( - "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", - "rag.enable_web_loader_ssl_verification", - os.environ.get("ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true", -) - RAG_EMBEDDING_ENGINE = PersistentConfig( "RAG_EMBEDDING_ENGINE", "rag.embedding_engine", @@ -1990,16 +1993,20 @@ YOUTUBE_LOADER_PROXY_URL = PersistentConfig( ) -ENABLE_RAG_WEB_SEARCH = PersistentConfig( - "ENABLE_RAG_WEB_SEARCH", +#################################### +# Web Search (RAG) +#################################### + +ENABLE_WEB_SEARCH = PersistentConfig( + "ENABLE_WEB_SEARCH", "rag.web.search.enable", - os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true", + os.getenv("ENABLE_WEB_SEARCH", "False").lower() == "true", ) -RAG_WEB_SEARCH_ENGINE = PersistentConfig( - "RAG_WEB_SEARCH_ENGINE", +WEB_SEARCH_ENGINE = PersistentConfig( + "WEB_SEARCH_ENGINE", "rag.web.search.engine", - os.getenv("RAG_WEB_SEARCH_ENGINE", ""), + os.getenv("WEB_SEARCH_ENGINE", ""), ) BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig( @@ -2008,10 +2015,18 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig( os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true", ) + +WEB_SEARCH_RESULT_COUNT = PersistentConfig( + "WEB_SEARCH_RESULT_COUNT", + "rag.web.search.result_count", + int(os.getenv("WEB_SEARCH_RESULT_COUNT", "3")), +) + + # You can provide a list of your own websites to filter after performing a web search. # This ensures the highest level of safety and reliability of the information sources. -RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( - "RAG_WEB_SEARCH_DOMAIN_FILTER_LIST", +WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( + "WEB_SEARCH_DOMAIN_FILTER_LIST", "rag.web.search.domain.filter_list", [ # "wikipedia.com", @@ -2020,6 +2035,30 @@ RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( ], ) +WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( + "WEB_SEARCH_CONCURRENT_REQUESTS", + "rag.web.search.concurrent_requests", + int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")), +) + +WEB_LOADER_ENGINE = PersistentConfig( + "WEB_LOADER_ENGINE", + "rag.web.loader.engine", + os.environ.get("WEB_LOADER_ENGINE", ""), +) + +ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( + "ENABLE_WEB_LOADER_SSL_VERIFICATION", + "rag.web.loader.ssl_verification", + os.environ.get("ENABLE_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true", +) + +WEB_SEARCH_TRUST_ENV = PersistentConfig( + "WEB_SEARCH_TRUST_ENV", + "rag.web.search.trust_env", + os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true", +) + SEARXNG_QUERY_URL = PersistentConfig( "SEARXNG_QUERY_URL", @@ -2155,34 +2194,22 @@ SOUGOU_API_SK = PersistentConfig( os.getenv("SOUGOU_API_SK", ""), ) -RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( - "RAG_WEB_SEARCH_RESULT_COUNT", - "rag.web.search.result_count", - int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")), +TAVILY_API_KEY = PersistentConfig( + "TAVILY_API_KEY", + "rag.web.search.tavily_api_key", + os.getenv("TAVILY_API_KEY", ""), ) -RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( - "RAG_WEB_SEARCH_CONCURRENT_REQUESTS", - "rag.web.search.concurrent_requests", - int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), +TAVILY_EXTRACT_DEPTH = PersistentConfig( + "TAVILY_EXTRACT_DEPTH", + "rag.web.search.tavily_extract_depth", + os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), ) -RAG_WEB_LOADER_ENGINE = PersistentConfig( - "RAG_WEB_LOADER_ENGINE", - "rag.web.loader.engine", - os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web"), -) - -RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( - "RAG_WEB_SEARCH_TRUST_ENV", - "rag.web.search.trust_env", - os.getenv("RAG_WEB_SEARCH_TRUST_ENV", "False").lower() == "true", -) - -PLAYWRIGHT_WS_URI = PersistentConfig( - "PLAYWRIGHT_WS_URI", - "rag.web.loader.playwright_ws_uri", - os.environ.get("PLAYWRIGHT_WS_URI", ""), +PLAYWRIGHT_WS_URL = PersistentConfig( + "PLAYWRIGHT_WS_URL", + "rag.web.loader.PLAYWRIGHT_WS_URL", + os.environ.get("PLAYWRIGHT_WS_URL", ""), ) PLAYWRIGHT_TIMEOUT = PersistentConfig( @@ -2203,17 +2230,6 @@ FIRECRAWL_API_BASE_URL = PersistentConfig( os.environ.get("FIRECRAWL_API_BASE_URL", "https://api.firecrawl.dev"), ) -TAVILY_API_KEY = PersistentConfig( - "TAVILY_API_KEY", - "rag.web.loader.tavily_api_key", - os.getenv("TAVILY_API_KEY", ""), -) - -TAVILY_EXTRACT_DEPTH = PersistentConfig( - "TAVILY_EXTRACT_DEPTH", - "rag.web.loader.tavily_extract_depth", - os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), -) #################################### # Images diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 8095affac..e95de90a5 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -160,11 +160,11 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, - PLAYWRIGHT_WS_URI, + PLAYWRIGHT_WS_URL, PLAYWRIGHT_TIMEOUT, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, - RAG_WEB_LOADER_ENGINE, + WEB_LOADER_ENGINE, WHISPER_MODEL, DEEPGRAM_API_KEY, WHISPER_MODEL_AUTO_UPDATE, @@ -205,12 +205,13 @@ from open_webui.config import ( YOUTUBE_LOADER_LANGUAGE, YOUTUBE_LOADER_PROXY_URL, # Retrieval (Web Search) - RAG_WEB_SEARCH_ENGINE, + ENABLE_WEB_SEARCH, + WEB_SEARCH_ENGINE, BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - RAG_WEB_SEARCH_RESULT_COUNT, - RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - RAG_WEB_SEARCH_TRUST_ENV, - RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + WEB_SEARCH_RESULT_COUNT, + WEB_SEARCH_CONCURRENT_REQUESTS, + WEB_SEARCH_TRUST_ENV, + WEB_SEARCH_DOMAIN_FILTER_LIST, JINA_API_KEY, SEARCHAPI_API_KEY, SEARCHAPI_ENGINE, @@ -240,8 +241,7 @@ from open_webui.config import ( ONEDRIVE_CLIENT_ID, ENABLE_RAG_HYBRID_SEARCH, ENABLE_RAG_LOCAL_WEB_FETCH, - ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - ENABLE_RAG_WEB_SEARCH, + ENABLE_WEB_LOADER_SSL_VERIFICATION, ENABLE_GOOGLE_DRIVE_INTEGRATION, ENABLE_ONEDRIVE_INTEGRATION, UPLOAD_DIR, @@ -594,9 +594,7 @@ app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = BYPASS_EMBEDDING_AND_RETRIEVAL app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH -app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( - ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION -) +app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERIFICATION app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL @@ -629,12 +627,16 @@ app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL -app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH -app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE +app.state.config.ENABLE_WEB_SEARCH = ENABLE_WEB_SEARCH +app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE +app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST +app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT +app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS +app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE +app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) -app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION @@ -662,11 +664,8 @@ app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY app.state.config.SOUGOU_API_SID = SOUGOU_API_SID app.state.config.SOUGOU_API_SK = SOUGOU_API_SK -app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT -app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS -app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE -app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV -app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI + +app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY @@ -1261,7 +1260,7 @@ async def get_app_config(request: Request): { "enable_direct_connections": app.state.config.ENABLE_DIRECT_CONNECTIONS, "enable_channels": app.state.config.ENABLE_CHANNELS, - "enable_web_search": app.state.config.ENABLE_RAG_WEB_SEARCH, + "enable_web_search": app.state.config.ENABLE_WEB_SEARCH, "enable_code_execution": app.state.config.ENABLE_CODE_EXECUTION, "enable_code_interpreter": app.state.config.ENABLE_CODE_INTERPRETER, "enable_image_generation": app.state.config.ENABLE_IMAGE_GENERATION, diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 942cb8483..718cfe52f 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -28,9 +28,9 @@ from open_webui.retrieval.loaders.tavily import TavilyLoader from open_webui.constants import ERROR_MESSAGES from open_webui.config import ( ENABLE_RAG_LOCAL_WEB_FETCH, - PLAYWRIGHT_WS_URI, + PLAYWRIGHT_WS_URL, PLAYWRIGHT_TIMEOUT, - RAG_WEB_LOADER_ENGINE, + WEB_LOADER_ENGINE, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, TAVILY_API_KEY, @@ -584,13 +584,6 @@ class SafeWebBaseLoader(WebBaseLoader): return [document async for document in self.alazy_load()] -RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader) -RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader -RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader -RAG_WEB_LOADER_ENGINES["firecrawl"] = SafeFireCrawlLoader -RAG_WEB_LOADER_ENGINES["tavily"] = SafeTavilyLoader - - def get_web_loader( urls: Union[str, Sequence[str]], verify_ssl: bool = True, @@ -608,27 +601,36 @@ def get_web_loader( "trust_env": trust_env, } - if RAG_WEB_LOADER_ENGINE.value == "playwright": + if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web": + WebLoaderClass = SafeWebBaseLoader + if WEB_LOADER_ENGINE.value == "playwright": + WebLoaderClass = SafePlaywrightURLLoader web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000 - if PLAYWRIGHT_WS_URI.value: - web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value + if PLAYWRIGHT_WS_URL.value: + web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URL.value - if RAG_WEB_LOADER_ENGINE.value == "firecrawl": + if WEB_LOADER_ENGINE.value == "firecrawl": + WebLoaderClass = SafeFireCrawlLoader web_loader_args["api_key"] = FIRECRAWL_API_KEY.value web_loader_args["api_url"] = FIRECRAWL_API_BASE_URL.value - if RAG_WEB_LOADER_ENGINE.value == "tavily": + if WEB_LOADER_ENGINE.value == "tavily": + WebLoaderClass = SafeTavilyLoader web_loader_args["api_key"] = TAVILY_API_KEY.value web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value - # Create the appropriate WebLoader based on the configuration - WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value] - web_loader = WebLoaderClass(**web_loader_args) + if WebLoaderClass: + web_loader = WebLoaderClass(**web_loader_args) - log.debug( - "Using RAG_WEB_LOADER_ENGINE %s for %s URLs", - web_loader.__class__.__name__, - len(safe_urls), - ) + log.debug( + "Using WEB_LOADER_ENGINE %s for %s URLs", + web_loader.__class__.__name__, + len(safe_urls), + ) - return web_loader + return web_loader + else: + raise ValueError( + f"Invalid WEB_LOADER_ENGINE: {WEB_LOADER_ENGINE.value}. " + "Please set it to 'safe_web', 'playwright', 'firecrawl', or 'tavily'." + ) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index d00e303f1..accb21d32 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -352,482 +352,432 @@ async def update_reranking_config( async def get_rag_config(request: Request, user=Depends(get_admin_user)): return { "status": True, - "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, - "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # RAG settings + "TEMPLATE": request.app.state.config.RAG_TEMPLATE, + "TOP_K": request.app.state.config.TOP_K, "BYPASS_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL, - "enable_google_drive_integration": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, - "enable_onedrive_integration": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, - "content_extraction": { - "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, - "tika_server_url": request.app.state.config.TIKA_SERVER_URL, - "docling_server_url": request.app.state.config.DOCLING_SERVER_URL, - "document_intelligence_config": { - "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, - "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, - }, - "mistral_ocr_config": { - "api_key": request.app.state.config.MISTRAL_OCR_API_KEY, - }, - }, - "chunk": { - "text_splitter": request.app.state.config.TEXT_SPLITTER, - "chunk_size": request.app.state.config.CHUNK_SIZE, - "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, - }, - "file": { - "max_size": request.app.state.config.FILE_MAX_SIZE, - "max_count": request.app.state.config.FILE_MAX_COUNT, - }, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # Hybrid search settings + "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, + "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, + "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, + # Content extraction settings + "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, + "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, + "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # Chunking settings + "TEXT_SPLITTER": request.app.state.config.TEXT_SPLITTER, + "CHUNK_SIZE": request.app.state.config.CHUNK_SIZE, + "CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP, + # File upload settings + "FILE_MAX_SIZE": request.app.state.config.FILE_MAX_SIZE, + "FILE_MAX_COUNT": request.app.state.config.FILE_MAX_COUNT, + # Integration settings + "ENABLE_GOOGLE_DRIVE_INTEGRATION": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, + "ENABLE_ONEDRIVE_INTEGRATION": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, + # Web search settings "web": { - "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, - "search": { - "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, - "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, - "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, - "google_pse_engine_id": request.app.state.config.GOOGLE_PSE_ENGINE_ID, - "brave_search_api_key": request.app.state.config.BRAVE_SEARCH_API_KEY, - "kagi_search_api_key": request.app.state.config.KAGI_SEARCH_API_KEY, - "mojeek_search_api_key": request.app.state.config.MOJEEK_SEARCH_API_KEY, - "bocha_search_api_key": request.app.state.config.BOCHA_SEARCH_API_KEY, - "serpstack_api_key": request.app.state.config.SERPSTACK_API_KEY, - "serpstack_https": request.app.state.config.SERPSTACK_HTTPS, - "serper_api_key": request.app.state.config.SERPER_API_KEY, - "serply_api_key": request.app.state.config.SERPLY_API_KEY, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "searchapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, - "searchapi_engine": request.app.state.config.SEARCHAPI_ENGINE, - "serpapi_api_key": request.app.state.config.SERPAPI_API_KEY, - "serpapi_engine": request.app.state.config.SERPAPI_ENGINE, - "jina_api_key": request.app.state.config.JINA_API_KEY, - "bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT, - "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, - "exa_api_key": request.app.state.config.EXA_API_KEY, - "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, - "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, - "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, - "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, - }, - "loader": { - "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, - "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, - "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, - "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, - "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, - "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - }, - }, + "ENABLE_WEB_SEARCH": request.app.state.config.ENABLE_WEB_SEARCH, + "WEB_SEARCH_ENGINE": request.app.state.config.WEB_SEARCH_ENGINE, + "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, + "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, + "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, + "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, + "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, + "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, + "KAGI_SEARCH_API_KEY": request.app.state.config.KAGI_SEARCH_API_KEY, + "MOJEEK_SEARCH_API_KEY": request.app.state.config.MOJEEK_SEARCH_API_KEY, + "BOCHA_SEARCH_API_KEY": request.app.state.config.BOCHA_SEARCH_API_KEY, + "SERPSTACK_API_KEY": request.app.state.config.SERPSTACK_API_KEY, + "SERPSTACK_HTTPS": request.app.state.config.SERPSTACK_HTTPS, + "SERPER_API_KEY": request.app.state.config.SERPER_API_KEY, + "SERPLY_API_KEY": request.app.state.config.SERPLY_API_KEY, + "TAVILY_API_KEY": request.app.state.config.TAVILY_API_KEY, + "SEARCHAPI_API_KEY": request.app.state.config.SEARCHAPI_API_KEY, + "SEARCHAPI_ENGINE": request.app.state.config.SEARCHAPI_ENGINE, + "SERPAPI_API_KEY": request.app.state.config.SERPAPI_API_KEY, + "SERPAPI_ENGINE": request.app.state.config.SERPAPI_ENGINE, + "JINA_API_KEY": request.app.state.config.JINA_API_KEY, + "BING_SEARCH_V7_ENDPOINT": request.app.state.config.BING_SEARCH_V7_ENDPOINT, + "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, + "EXA_API_KEY": request.app.state.config.EXA_API_KEY, + "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, + "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, + "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, + "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, + "FIRECRAWL_API_BASE_URL": request.app.state.config.FIRECRAWL_API_BASE_URL, + "TAVILY_EXTRACT_DEPTH": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, }, } -class FileConfig(BaseModel): - max_size: Optional[int] = None - max_count: Optional[int] = None - - -class DocumentIntelligenceConfigForm(BaseModel): - endpoint: str - key: str - - -class MistralOCRConfigForm(BaseModel): - api_key: str - - -class ContentExtractionConfig(BaseModel): - engine: str = "" - tika_server_url: Optional[str] = None - docling_server_url: Optional[str] = None - document_intelligence_config: Optional[DocumentIntelligenceConfigForm] = None - mistral_ocr_config: Optional[MistralOCRConfigForm] = None - - -class ChunkParamUpdateForm(BaseModel): - text_splitter: Optional[str] = None - chunk_size: int - chunk_overlap: int - - -class YoutubeLoaderConfig(BaseModel): - language: list[str] - translation: Optional[str] = None - proxy_url: str = "" - - -class WebSearchConfig(BaseModel): - engine: Optional[str] = None - searxng_query_url: Optional[str] = None - google_pse_api_key: Optional[str] = None - google_pse_engine_id: Optional[str] = None - brave_search_api_key: Optional[str] = None - kagi_search_api_key: Optional[str] = None - mojeek_search_api_key: Optional[str] = None - bocha_search_api_key: Optional[str] = None - serpstack_api_key: Optional[str] = None - serpstack_https: Optional[bool] = None - serper_api_key: Optional[str] = None - serply_api_key: Optional[str] = None - tavily_api_key: Optional[str] = None - searchapi_api_key: Optional[str] = None - searchapi_engine: Optional[str] = None - serpapi_api_key: Optional[str] = None - serpapi_engine: Optional[str] = None - jina_api_key: Optional[str] = None - bing_search_v7_endpoint: Optional[str] = None - bing_search_v7_subscription_key: Optional[str] = None - exa_api_key: Optional[str] = None - perplexity_api_key: Optional[str] = None - sougou_api_sid: Optional[str] = None - sougou_api_sk: Optional[str] = None - result_count: Optional[int] = None - concurrent_requests: Optional[int] = None - domain_filter_list: Optional[List[str]] = [] - - -class WebLoaderConfig(BaseModel): - engine: Optional[str] = None - enable_ssl_verification: Optional[bool] = None - trust_env: Optional[bool] = None - bypass_embedding_and_retrieval: Optional[bool] = None - playwright_ws_uri: Optional[str] = None - playwright_timeout: Optional[int] = None - firecrawl_api_key: Optional[str] = None - firecrawl_api_base_url: Optional[str] = None - tavily_api_key: Optional[str] = None - tavily_extract_depth: Optional[str] = None - youtube: Optional[YoutubeLoaderConfig] = None - - class WebConfig(BaseModel): - ENABLE_RAG_WEB_SEARCH: Optional[bool] = None - search: WebSearchConfig - loader: WebLoaderConfig + ENABLE_WEB_SEARCH: Optional[bool] = None + WEB_SEARCH_ENGINE: Optional[str] = None + WEB_SEARCH_TRUST_ENV: Optional[bool] = None + WEB_SEARCH_RESULT_COUNT: Optional[int] = None + WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None + WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] + BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None + SEARXNG_QUERY_URL: Optional[str] = None + GOOGLE_PSE_API_KEY: Optional[str] = None + GOOGLE_PSE_ENGINE_ID: Optional[str] = None + BRAVE_SEARCH_API_KEY: Optional[str] = None + KAGI_SEARCH_API_KEY: Optional[str] = None + MOJEEK_SEARCH_API_KEY: Optional[str] = None + BOCHA_SEARCH_API_KEY: Optional[str] = None + SERPSTACK_API_KEY: Optional[str] = None + SERPSTACK_HTTPS: Optional[bool] = None + SERPER_API_KEY: Optional[str] = None + SERPLY_API_KEY: Optional[str] = None + TAVILY_API_KEY: Optional[str] = None + SEARCHAPI_API_KEY: Optional[str] = None + SEARCHAPI_ENGINE: Optional[str] = None + SERPAPI_API_KEY: Optional[str] = None + SERPAPI_ENGINE: Optional[str] = None + JINA_API_KEY: Optional[str] = None + BING_SEARCH_V7_ENDPOINT: Optional[str] = None + BING_SEARCH_V7_SUBSCRIPTION_KEY: Optional[str] = None + EXA_API_KEY: Optional[str] = None + PERPLEXITY_API_KEY: Optional[str] = None + SOUGOU_API_SID: Optional[str] = None + SOUGOU_API_SK: Optional[str] = None + WEB_LOADER_ENGINE: Optional[str] = None + ENABLE_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None + PLAYWRIGHT_WS_URL: Optional[str] = None + PLAYWRIGHT_TIMEOUT: Optional[int] = None + FIRECRAWL_API_KEY: Optional[str] = None + FIRECRAWL_API_BASE_URL: Optional[str] = None + TAVILY_EXTRACT_DEPTH: Optional[str] = None + YOUTUBE_LOADER_LANGUAGE: Optional[List[str]] = None + YOUTUBE_LOADER_PROXY_URL: Optional[str] = None + YOUTUBE_LOADER_TRANSLATION: Optional[str] = None -class ConfigUpdateForm(BaseModel): - RAG_FULL_CONTEXT: Optional[bool] = None +class ConfigForm(BaseModel): + # RAG settings + TEMPLATE: Optional[str] = None + TOP_K: Optional[int] = None BYPASS_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None - pdf_extract_images: Optional[bool] = None - enable_google_drive_integration: Optional[bool] = None - enable_onedrive_integration: Optional[bool] = None - file: Optional[FileConfig] = None - content_extraction: Optional[ContentExtractionConfig] = None - chunk: Optional[ChunkParamUpdateForm] = None + RAG_FULL_CONTEXT: Optional[bool] = None + + # Hybrid search settings + ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None + TOP_K_RERANKER: Optional[int] = None + RELEVANCE_THRESHOLD: Optional[float] = None + + # Content extraction settings + CONTENT_EXTRACTION_ENGINE: Optional[str] = None + PDF_EXTRACT_IMAGES: Optional[bool] = None + TIKA_SERVER_URL: Optional[str] = None + DOCLING_SERVER_URL: Optional[str] = None + DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None + DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None + MISTRAL_OCR_API_KEY: Optional[str] = None + + # Chunking settings + TEXT_SPLITTER: Optional[str] = None + CHUNK_SIZE: Optional[int] = None + CHUNK_OVERLAP: Optional[int] = None + + # File upload settings + FILE_MAX_SIZE: Optional[int] = None + FILE_MAX_COUNT: Optional[int] = None + + # Integration settings + ENABLE_GOOGLE_DRIVE_INTEGRATION: Optional[bool] = None + ENABLE_ONEDRIVE_INTEGRATION: Optional[bool] = None + + # Web search settings web: Optional[WebConfig] = None @router.post("/config/update") async def update_rag_config( - request: Request, form_data: ConfigUpdateForm, user=Depends(get_admin_user) + request: Request, form_data: ConfigForm, user=Depends(get_admin_user) ): - request.app.state.config.PDF_EXTRACT_IMAGES = ( - form_data.pdf_extract_images - if form_data.pdf_extract_images is not None - else request.app.state.config.PDF_EXTRACT_IMAGES + # RAG settings + request.app.state.config.RAG_TEMPLATE = ( + form_data.TEMPLATE + if form_data.TEMPLATE is not None + else request.app.state.config.RAG_TEMPLATE + ) + request.app.state.config.TOP_K = ( + form_data.TOP_K + if form_data.TOP_K is not None + else request.app.state.config.TOP_K + ) + request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = ( + form_data.BYPASS_EMBEDDING_AND_RETRIEVAL + if form_data.BYPASS_EMBEDDING_AND_RETRIEVAL is not None + else request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL ) - request.app.state.config.RAG_FULL_CONTEXT = ( form_data.RAG_FULL_CONTEXT if form_data.RAG_FULL_CONTEXT is not None else request.app.state.config.RAG_FULL_CONTEXT ) - request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = ( - form_data.BYPASS_EMBEDDING_AND_RETRIEVAL - if form_data.BYPASS_EMBEDDING_AND_RETRIEVAL is not None - else request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + # Hybrid search settings + request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( + form_data.ENABLE_RAG_HYBRID_SEARCH + if form_data.ENABLE_RAG_HYBRID_SEARCH is not None + else request.app.state.config.ENABLE_RAG_HYBRID_SEARCH + ) + # Free up memory if hybrid search is disabled + if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: + request.app.state.rf = None + + request.app.state.config.TOP_K_RERANKER = ( + form_data.TOP_K_RERANKER + if form_data.TOP_K_RERANKER is not None + else request.app.state.config.TOP_K_RERANKER + ) + request.app.state.config.RELEVANCE_THRESHOLD = ( + form_data.RELEVANCE_THRESHOLD + if form_data.RELEVANCE_THRESHOLD is not None + else request.app.state.config.RELEVANCE_THRESHOLD ) + # Content extraction settings + request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( + form_data.CONTENT_EXTRACTION_ENGINE + if form_data.CONTENT_EXTRACTION_ENGINE is not None + else request.app.state.config.CONTENT_EXTRACTION_ENGINE + ) + request.app.state.config.PDF_EXTRACT_IMAGES = ( + form_data.PDF_EXTRACT_IMAGES + if form_data.PDF_EXTRACT_IMAGES is not None + else request.app.state.config.PDF_EXTRACT_IMAGES + ) + request.app.state.config.TIKA_SERVER_URL = ( + form_data.TIKA_SERVER_URL + if form_data.TIKA_SERVER_URL is not None + else request.app.state.config.TIKA_SERVER_URL + ) + request.app.state.config.DOCLING_SERVER_URL = ( + form_data.DOCLING_SERVER_URL + if form_data.DOCLING_SERVER_URL is not None + else request.app.state.config.DOCLING_SERVER_URL + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( + form_data.DOCUMENT_INTELLIGENCE_ENDPOINT + if form_data.DOCUMENT_INTELLIGENCE_ENDPOINT is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_KEY = ( + form_data.DOCUMENT_INTELLIGENCE_KEY + if form_data.DOCUMENT_INTELLIGENCE_KEY is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY + ) + request.app.state.config.MISTRAL_OCR_API_KEY = ( + form_data.MISTRAL_OCR_API_KEY + if form_data.MISTRAL_OCR_API_KEY is not None + else request.app.state.config.MISTRAL_OCR_API_KEY + ) + + # Chunking settings + request.app.state.config.TEXT_SPLITTER = ( + form_data.TEXT_SPLITTER + if form_data.TEXT_SPLITTER is not None + else request.app.state.config.TEXT_SPLITTER + ) + request.app.state.config.CHUNK_SIZE = ( + form_data.CHUNK_SIZE + if form_data.CHUNK_SIZE is not None + else request.app.state.config.CHUNK_SIZE + ) + request.app.state.config.CHUNK_OVERLAP = ( + form_data.CHUNK_OVERLAP + if form_data.CHUNK_OVERLAP is not None + else request.app.state.config.CHUNK_OVERLAP + ) + + # File upload settings + request.app.state.config.FILE_MAX_SIZE = ( + form_data.FILE_MAX_SIZE + if form_data.FILE_MAX_SIZE is not None + else request.app.state.config.FILE_MAX_SIZE + ) + request.app.state.config.FILE_MAX_COUNT = ( + form_data.FILE_MAX_COUNT + if form_data.FILE_MAX_COUNT is not None + else request.app.state.config.FILE_MAX_COUNT + ) + + # Integration settings request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ( form_data.enable_google_drive_integration if form_data.enable_google_drive_integration is not None else request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION ) - request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ( - form_data.enable_onedrive_integration - if form_data.enable_onedrive_integration is not None + form_data.ENABLE_ONEDRIVE_INTEGRATION + if form_data.ENABLE_ONEDRIVE_INTEGRATION is not None else request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION ) - if form_data.file is not None: - request.app.state.config.FILE_MAX_SIZE = form_data.file.max_size - request.app.state.config.FILE_MAX_COUNT = form_data.file.max_count - - if form_data.content_extraction is not None: - log.info( - f"Updating content extraction: {request.app.state.config.CONTENT_EXTRACTION_ENGINE} to {form_data.content_extraction.engine}" - ) - request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( - form_data.content_extraction.engine - ) - request.app.state.config.TIKA_SERVER_URL = ( - form_data.content_extraction.tika_server_url - ) - request.app.state.config.DOCLING_SERVER_URL = ( - form_data.content_extraction.docling_server_url - ) - if form_data.content_extraction.document_intelligence_config is not None: - request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( - form_data.content_extraction.document_intelligence_config.endpoint - ) - request.app.state.config.DOCUMENT_INTELLIGENCE_KEY = ( - form_data.content_extraction.document_intelligence_config.key - ) - if form_data.content_extraction.mistral_ocr_config is not None: - request.app.state.config.MISTRAL_OCR_API_KEY = ( - form_data.content_extraction.mistral_ocr_config.api_key - ) - - if form_data.chunk is not None: - request.app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter - request.app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size - request.app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap - if form_data.web is not None: - request.app.state.config.ENABLE_RAG_WEB_SEARCH = ( - form_data.web.ENABLE_RAG_WEB_SEARCH + # Web search settings + request.app.state.config.ENABLE_WEB_SEARCH = form_data.web.ENABLE_WEB_SEARCH + request.app.state.config.WEB_SEARCH_ENGINE = form_data.web.WEB_SEARCH_ENGINE + request.app.state.config.WEB_SEARCH_TRUST_ENV = ( + form_data.web.WEB_SEARCH_TRUST_ENV ) - - request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine - request.app.state.config.SEARXNG_QUERY_URL = ( - form_data.web.search.searxng_query_url + request.app.state.config.WEB_SEARCH_RESULT_COUNT = ( + form_data.web.WEB_SEARCH_RESULT_COUNT ) - request.app.state.config.GOOGLE_PSE_API_KEY = ( - form_data.web.search.google_pse_api_key + request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = ( + form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS ) - request.app.state.config.GOOGLE_PSE_ENGINE_ID = ( - form_data.web.search.google_pse_engine_id - ) - request.app.state.config.BRAVE_SEARCH_API_KEY = ( - form_data.web.search.brave_search_api_key - ) - request.app.state.config.KAGI_SEARCH_API_KEY = ( - form_data.web.search.kagi_search_api_key - ) - request.app.state.config.MOJEEK_SEARCH_API_KEY = ( - form_data.web.search.mojeek_search_api_key - ) - request.app.state.config.BOCHA_SEARCH_API_KEY = ( - form_data.web.search.bocha_search_api_key - ) - request.app.state.config.SERPSTACK_API_KEY = ( - form_data.web.search.serpstack_api_key - ) - request.app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https - request.app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key - request.app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key - request.app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key - request.app.state.config.SEARCHAPI_API_KEY = ( - form_data.web.search.searchapi_api_key - ) - request.app.state.config.SEARCHAPI_ENGINE = ( - form_data.web.search.searchapi_engine - ) - request.app.state.config.SERPAPI_API_KEY = form_data.web.search.serpapi_api_key - request.app.state.config.SERPAPI_ENGINE = form_data.web.search.serpapi_engine - request.app.state.config.JINA_API_KEY = form_data.web.search.jina_api_key - request.app.state.config.BING_SEARCH_V7_ENDPOINT = ( - form_data.web.search.bing_search_v7_endpoint - ) - request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( - form_data.web.search.bing_search_v7_subscription_key - ) - request.app.state.config.EXA_API_KEY = form_data.web.search.exa_api_key - request.app.state.config.PERPLEXITY_API_KEY = ( - form_data.web.search.perplexity_api_key - ) - request.app.state.config.SOUGOU_API_SID = form_data.web.search.sougou_api_sid - request.app.state.config.SOUGOU_API_SK = form_data.web.search.sougou_api_sk - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = ( - form_data.web.search.result_count - ) - request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( - form_data.web.search.concurrent_requests - ) - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = ( - form_data.web.search.domain_filter_list - ) - - request.app.state.config.RAG_WEB_LOADER_ENGINE = form_data.web.loader.engine - request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( - # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False - form_data.web.loader.enable_ssl_verification - ) - request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV = ( - form_data.web.loader.trust_env + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = ( + form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST ) request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( - form_data.web.loader.bypass_embedding_and_retrieval + form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) - request.app.state.config.PLAYWRIGHT_WS_URI = ( - form_data.web.loader.playwright_ws_uri + request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL + request.app.state.config.GOOGLE_PSE_API_KEY = form_data.web.GOOGLE_PSE_API_KEY + request.app.state.config.GOOGLE_PSE_ENGINE_ID = ( + form_data.web.GOOGLE_PSE_ENGINE_ID ) - request.app.state.config.PLAYWRIGHT_TIMEOUT = ( - form_data.web.loader.playwright_timeout + request.app.state.config.BRAVE_SEARCH_API_KEY = ( + form_data.web.BRAVE_SEARCH_API_KEY ) - request.app.state.config.FIRECRAWL_API_KEY = ( - form_data.web.loader.firecrawl_api_key + request.app.state.config.KAGI_SEARCH_API_KEY = form_data.web.KAGI_SEARCH_API_KEY + request.app.state.config.MOJEEK_SEARCH_API_KEY = ( + form_data.web.MOJEEK_SEARCH_API_KEY ) + request.app.state.config.BOCHA_SEARCH_API_KEY = ( + form_data.web.BOCHA_SEARCH_API_KEY + ) + request.app.state.config.SERPSTACK_API_KEY = form_data.web.SERPSTACK_API_KEY + request.app.state.config.SERPSTACK_HTTPS = form_data.web.SERPSTACK_HTTPS + request.app.state.config.SERPER_API_KEY = form_data.web.SERPER_API_KEY + request.app.state.config.SERPLY_API_KEY = form_data.web.SERPLY_API_KEY + request.app.state.config.TAVILY_API_KEY = form_data.web.TAVILY_API_KEY + request.app.state.config.SEARCHAPI_API_KEY = form_data.web.SEARCHAPI_API_KEY + request.app.state.config.SEARCHAPI_ENGINE = form_data.web.SEARCHAPI_ENGINE + request.app.state.config.SERPAPI_API_KEY = form_data.web.SERPAPI_API_KEY + request.app.state.config.SERPAPI_ENGINE = form_data.web.SERPAPI_ENGINE + request.app.state.config.JINA_API_KEY = form_data.web.JINA_API_KEY + request.app.state.config.BING_SEARCH_V7_ENDPOINT = ( + form_data.web.BING_SEARCH_V7_ENDPOINT + ) + request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( + form_data.web.BING_SEARCH_V7_SUBSCRIPTION_KEY + ) + request.app.state.config.EXA_API_KEY = form_data.web.EXA_API_KEY + request.app.state.config.PERPLEXITY_API_KEY = form_data.web.PERPLEXITY_API_KEY + request.app.state.config.SOUGOU_API_SID = form_data.web.SOUGOU_API_SID + request.app.state.config.SOUGOU_API_SK = form_data.web.SOUGOU_API_SK + + # Web loader settings + request.app.state.config.WEB_LOADER_ENGINE = form_data.web.WEB_LOADER_ENGINE + request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ( + form_data.web.ENABLE_WEB_LOADER_SSL_VERIFICATION + ) + request.app.state.config.PLAYWRIGHT_WS_URL = form_data.web.PLAYWRIGHT_WS_URL + request.app.state.config.PLAYWRIGHT_TIMEOUT = form_data.web.PLAYWRIGHT_TIMEOUT + request.app.state.config.FIRECRAWL_API_KEY = form_data.web.FIRECRAWL_API_KEY request.app.state.config.FIRECRAWL_API_BASE_URL = ( - form_data.web.loader.firecrawl_api_base_url + form_data.web.FIRECRAWL_API_BASE_URL ) - request.app.state.config.TAVILY_API_KEY = form_data.web.loader.tavily_api_key request.app.state.config.TAVILY_EXTRACT_DEPTH = ( - form_data.web.loader.tavily_extract_depth + form_data.web.TAVILY_EXTRACT_DEPTH ) request.app.state.config.YOUTUBE_LOADER_LANGUAGE = ( - form_data.web.loader.youtube.language + form_data.web.YOUTUBE_LOADER_LANGUAGE ) request.app.state.config.YOUTUBE_LOADER_PROXY_URL = ( - form_data.web.loader.youtube.proxy_url + form_data.web.YOUTUBE_LOADER_PROXY_URL ) - request.app.state.YOUTUBE_LOADER_TRANSLATION = ( - form_data.web.loader.youtube.translation + request.app.state.config.YOUTUBE_LOADER_TRANSLATION = ( + form_data.web.YOUTUBE_LOADER_TRANSLATION ) return { "status": True, - "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, - "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, "BYPASS_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL, - "file": { - "max_size": request.app.state.config.FILE_MAX_SIZE, - "max_count": request.app.state.config.FILE_MAX_COUNT, - }, - "content_extraction": { - "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, - "tika_server_url": request.app.state.config.TIKA_SERVER_URL, - "docling_server_url": request.app.state.config.DOCLING_SERVER_URL, - "document_intelligence_config": { - "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, - "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, - }, - "mistral_ocr_config": { - "api_key": request.app.state.config.MISTRAL_OCR_API_KEY, - }, - }, - "chunk": { - "text_splitter": request.app.state.config.TEXT_SPLITTER, - "chunk_size": request.app.state.config.CHUNK_SIZE, - "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, - }, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # Content extraction settings + "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, + "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, + "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # Chunking settings + "TEXT_SPLITTER": request.app.state.config.TEXT_SPLITTER, + "CHUNK_SIZE": request.app.state.config.CHUNK_SIZE, + "CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP, + # File upload settings + "FILE_MAX_SIZE": request.app.state.config.FILE_MAX_SIZE, + "FILE_MAX_COUNT": request.app.state.config.FILE_MAX_COUNT, + # Integration settings + "ENABLE_GOOGLE_DRIVE_INTEGRATION": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, + "ENABLE_ONEDRIVE_INTEGRATION": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, + # Web search settings "web": { - "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, - "search": { - "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, - "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, - "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, - "google_pse_engine_id": request.app.state.config.GOOGLE_PSE_ENGINE_ID, - "brave_search_api_key": request.app.state.config.BRAVE_SEARCH_API_KEY, - "kagi_search_api_key": request.app.state.config.KAGI_SEARCH_API_KEY, - "mojeek_search_api_key": request.app.state.config.MOJEEK_SEARCH_API_KEY, - "bocha_search_api_key": request.app.state.config.BOCHA_SEARCH_API_KEY, - "serpstack_api_key": request.app.state.config.SERPSTACK_API_KEY, - "serpstack_https": request.app.state.config.SERPSTACK_HTTPS, - "serper_api_key": request.app.state.config.SERPER_API_KEY, - "serply_api_key": request.app.state.config.SERPLY_API_KEY, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "searchapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, - "searchapi_engine": request.app.state.config.SEARCHAPI_ENGINE, - "serpapi_api_key": request.app.state.config.SERPAPI_API_KEY, - "serpapi_engine": request.app.state.config.SERPAPI_ENGINE, - "jina_api_key": request.app.state.config.JINA_API_KEY, - "bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT, - "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, - "exa_api_key": request.app.state.config.EXA_API_KEY, - "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, - "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, - "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, - "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, - }, - "loader": { - "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, - "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, - "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, - "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, - "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, - "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - }, - }, + "ENABLE_WEB_SEARCH": request.app.state.config.ENABLE_WEB_SEARCH, + "WEB_SEARCH_ENGINE": request.app.state.config.WEB_SEARCH_ENGINE, + "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, + "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, + "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, + "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, + "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, + "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, + "KAGI_SEARCH_API_KEY": request.app.state.config.KAGI_SEARCH_API_KEY, + "MOJEEK_SEARCH_API_KEY": request.app.state.config.MOJEEK_SEARCH_API_KEY, + "BOCHA_SEARCH_API_KEY": request.app.state.config.BOCHA_SEARCH_API_KEY, + "SERPSTACK_API_KEY": request.app.state.config.SERPSTACK_API_KEY, + "SERPSTACK_HTTPS": request.app.state.config.SERPSTACK_HTTPS, + "SERPER_API_KEY": request.app.state.config.SERPER_API_KEY, + "SERPLY_API_KEY": request.app.state.config.SERPLY_API_KEY, + "TAVILY_API_KEY": request.app.state.config.TAVILY_API_KEY, + "SEARCHAPI_API_KEY": request.app.state.config.SEARCHAPI_API_KEY, + "SEARCHAPI_ENGINE": request.app.state.config.SEARCHAPI_ENGINE, + "SERPAPI_API_KEY": request.app.state.config.SERPAPI_API_KEY, + "SERPAPI_ENGINE": request.app.state.config.SERPAPI_ENGINE, + "JINA_API_KEY": request.app.state.config.JINA_API_KEY, + "BING_SEARCH_V7_ENDPOINT": request.app.state.config.BING_SEARCH_V7_ENDPOINT, + "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, + "EXA_API_KEY": request.app.state.config.EXA_API_KEY, + "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, + "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, + "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, + "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, + "FIRECRAWL_API_BASE_URL": request.app.state.config.FIRECRAWL_API_BASE_URL, + "TAVILY_EXTRACT_DEPTH": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, }, } -@router.get("/template") -async def get_rag_template(request: Request, user=Depends(get_verified_user)): - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - } - - -@router.get("/query/settings") -async def get_query_settings(request: Request, user=Depends(get_admin_user)): - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - "k": request.app.state.config.TOP_K, - "k_reranker": request.app.state.config.TOP_K_RERANKER, - "r": request.app.state.config.RELEVANCE_THRESHOLD, - "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, - } - - -class QuerySettingsForm(BaseModel): - k: Optional[int] = None - k_reranker: Optional[int] = None - r: Optional[float] = None - template: Optional[str] = None - hybrid: Optional[bool] = None - - -@router.post("/query/settings/update") -async def update_query_settings( - request: Request, form_data: QuerySettingsForm, user=Depends(get_admin_user) -): - request.app.state.config.RAG_TEMPLATE = form_data.template - request.app.state.config.TOP_K = form_data.k if form_data.k else 4 - request.app.state.config.TOP_K_RERANKER = form_data.k_reranker or 4 - request.app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 - - request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( - form_data.hybrid if form_data.hybrid else False - ) - - if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: - request.app.state.rf = None - - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - "k": request.app.state.config.TOP_K, - "k_reranker": request.app.state.config.TOP_K_RERANKER, - "r": request.app.state.config.RELEVANCE_THRESHOLD, - "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, - } - - #################################### # # Document process and retrieval @@ -1268,8 +1218,8 @@ def process_web( loader = get_web_loader( form_data.url, - verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, + verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, ) docs = loader.load() content = " ".join([doc.page_content for doc in docs]) @@ -1333,8 +1283,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_searxng( request.app.state.config.SEARXNG_QUERY_URL, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SEARXNG_QUERY_URL found in environment variables") @@ -1347,8 +1297,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.GOOGLE_PSE_API_KEY, request.app.state.config.GOOGLE_PSE_ENGINE_ID, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception( @@ -1359,8 +1309,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_brave( request.app.state.config.BRAVE_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables") @@ -1369,8 +1319,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_kagi( request.app.state.config.KAGI_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No KAGI_SEARCH_API_KEY found in environment variables") @@ -1379,8 +1329,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_mojeek( request.app.state.config.MOJEEK_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No MOJEEK_SEARCH_API_KEY found in environment variables") @@ -1389,8 +1339,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_bocha( request.app.state.config.BOCHA_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No BOCHA_SEARCH_API_KEY found in environment variables") @@ -1399,8 +1349,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serpstack( request.app.state.config.SERPSTACK_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, https_enabled=request.app.state.config.SERPSTACK_HTTPS, ) else: @@ -1410,8 +1360,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serper( request.app.state.config.SERPER_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPER_API_KEY found in environment variables") @@ -1420,24 +1370,24 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serply( request.app.state.config.SERPLY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPLY_API_KEY found in environment variables") elif engine == "duckduckgo": return search_duckduckgo( query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "tavily": if request.app.state.config.TAVILY_API_KEY: return search_tavily( request.app.state.config.TAVILY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No TAVILY_API_KEY found in environment variables") @@ -1447,8 +1397,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.SEARCHAPI_API_KEY, request.app.state.config.SEARCHAPI_ENGINE, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SEARCHAPI_API_KEY found in environment variables") @@ -1458,8 +1408,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.SERPAPI_API_KEY, request.app.state.config.SERPAPI_ENGINE, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPAPI_API_KEY found in environment variables") @@ -1467,7 +1417,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_jina( request.app.state.config.JINA_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, ) elif engine == "bing": return search_bing( @@ -1475,34 +1425,39 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.BING_SEARCH_V7_ENDPOINT, str(DEFAULT_LOCALE), query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "exa": return search_exa( request.app.state.config.EXA_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "perplexity": return search_perplexity( request.app.state.config.PERPLEXITY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) - elif engine == 'sougou': - if request.app.state.config.SOUGOU_API_SID and request.app.state.config.SOUGOU_API_SK: + elif engine == "sougou": + if ( + request.app.state.config.SOUGOU_API_SID + and request.app.state.config.SOUGOU_API_SK + ): return search_sougou( request.app.state.config.SOUGOU_API_SID, request.app.state.config.SOUGOU_API_SK, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: - raise Exception("No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables") + raise Exception( + "No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables" + ) else: raise Exception("No search engine API key found in environment variables") @@ -1513,10 +1468,10 @@ async def process_web_search( ): try: logging.info( - f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" + f"trying to web search with {request.app.state.config.WEB_SEARCH_ENGINE, form_data.query}" ) web_results = search_web( - request, request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query + request, request.app.state.config.WEB_SEARCH_ENGINE, form_data.query ) except Exception as e: log.exception(e) @@ -1532,9 +1487,9 @@ async def process_web_search( urls = [result.link for result in web_results] loader = get_web_loader( urls, - verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, + verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, ) docs = await loader.aload() urls = [ diff --git a/backend/start.sh b/backend/start.sh index b9a30fd3d..4588e4c34 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,8 +4,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation -if [[ "${RAG_WEB_LOADER_ENGINE,,}" == "playwright" ]]; then - if [[ -z "${PLAYWRIGHT_WS_URI}" ]]; then +if [[ "${WEB_LOADER_ENGINE,,}" == "playwright" ]]; then + if [[ -z "${PLAYWRIGHT_WS_URL}" ]]; then echo "Installing Playwright browsers..." playwright install chromium playwright install-deps chromium diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 661ecc494..8d9aae3ac 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -7,8 +7,8 @@ SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation -IF /I "%RAG_WEB_LOADER_ENGINE%" == "playwright" ( - IF "%PLAYWRIGHT_WS_URI%" == "" ( +IF /I "%WEB_LOADER_ENGINE%" == "playwright" ( + IF "%PLAYWRIGHT_WS_URL%" == "" ( echo Installing Playwright browsers... playwright install chromium playwright install-deps chromium diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml index fe570bed0..fa2b49ff9 100644 --- a/docker-compose.playwright.yaml +++ b/docker-compose.playwright.yaml @@ -6,5 +6,5 @@ services: open-webui: environment: - - 'RAG_WEB_LOADER_ENGINE=playwright' - - 'PLAYWRIGHT_WS_URI=ws://playwright:3000' \ No newline at end of file + - 'WEB_LOADER_ENGINE=playwright' + - 'PLAYWRIGHT_WS_URL=ws://playwright:3000' diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 31317fe0b..f4b937b68 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -50,9 +50,9 @@ type YoutubeConfigForm = { }; type RAGConfigForm = { - pdf_extract_images?: boolean; - enable_google_drive_integration?: boolean; - enable_onedrive_integration?: boolean; + PDF_EXTRACT_IMAGES?: boolean; + ENABLE_GOOGLE_DRIVE_INTEGRATION?: boolean; + ENABLE_ONEDRIVE_INTEGRATION?: boolean; chunk?: ChunkConfigForm; content_extraction?: ContentExtractConfigForm; web_loader_ssl_verification?: boolean; @@ -89,33 +89,6 @@ export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => return res; }; -export const getRAGTemplate = async (token: string) => { - let error = null; - - const res = await fetch(`${RETRIEVAL_API_BASE_URL}/template`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}` - } - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - console.log(err); - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res?.template ?? ''; -}; - export const getQuerySettings = async (token: string) => { let error = null; diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 97c62c6a4..2047a07e7 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -17,8 +17,8 @@ updateRAGConfig } from '$lib/apis/retrieval'; - import { reindexKnowledgeFiles} from '$lib/apis/knowledge'; - import { deleteAllFiles } from '$lib/apis/files'; + import { reindexKnowledgeFiles } from '$lib/apis/knowledge'; + import { deleteAllFiles } from '$lib/apis/files'; import ResetUploadDirConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; import ResetVectorDBConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; @@ -27,6 +27,7 @@ import Tooltip from '$lib/components/common/Tooltip.svelte'; import Switch from '$lib/components/common/Switch.svelte'; import Textarea from '$lib/components/common/Textarea.svelte'; + import Spinner from '$lib/components/common/Spinner.svelte'; const i18n = getContext('i18n'); @@ -42,31 +43,6 @@ let embeddingBatchSize = 1; let rerankingModel = ''; - let fileMaxSize = null; - let fileMaxCount = null; - - let contentExtractionEngine = 'default'; - let tikaServerUrl = ''; - let showTikaServerUrl = false; - let doclingServerUrl = ''; - let showDoclingServerUrl = false; - let documentIntelligenceEndpoint = ''; - let documentIntelligenceKey = ''; - let showDocumentIntelligenceConfig = false; - let mistralApiKey = ''; - let showMistralOcrConfig = false; - - let textSplitter = ''; - let chunkSize = 0; - let chunkOverlap = 0; - let pdfExtractImages = true; - - let RAG_FULL_CONTEXT = false; - let BYPASS_EMBEDDING_AND_RETRIEVAL = false; - - let enableGoogleDriveIntegration = false; - let enableOneDriveIntegration = false; - let OpenAIUrl = ''; let OpenAIKey = ''; @@ -81,6 +57,8 @@ hybrid: false }; + let RAGConfig = null; + const embeddingModelUpdateHandler = async () => { if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) { toast.error( @@ -175,65 +153,40 @@ }; const submitHandler = async () => { - if (contentExtractionEngine === 'tika' && tikaServerUrl === '') { + if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika' && RAGConfig.TIKA_SERVER_URL === '') { toast.error($i18n.t('Tika Server URL required.')); return; } - if (contentExtractionEngine === 'docling' && doclingServerUrl === '') { + if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && RAGConfig.DOCLING_SERVER_URL === '') { toast.error($i18n.t('Docling Server URL required.')); return; } + if ( - contentExtractionEngine === 'document_intelligence' && - (documentIntelligenceEndpoint === '' || documentIntelligenceKey === '') + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' && + (RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' || + RAGConfig.DOCUMENT_INTELLIGENCE_KEY === '') ) { toast.error($i18n.t('Document Intelligence endpoint and key required.')); return; } - if (contentExtractionEngine === 'mistral_ocr' && mistralApiKey === '') { + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr' && + RAGConfig.MISTRAL_OCR_API_KEY === '' + ) { toast.error($i18n.t('Mistral OCR API Key required.')); return; } - if (!BYPASS_EMBEDDING_AND_RETRIEVAL) { + if (!RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL) { await embeddingModelUpdateHandler(); - if (querySettings.hybrid) { + if (RAGConfig.ENABLE_RAG_HYBRID_SEARCH) { await rerankingModelUpdateHandler(); } } - const res = await updateRAGConfig(localStorage.token, { - pdf_extract_images: pdfExtractImages, - enable_google_drive_integration: enableGoogleDriveIntegration, - enable_onedrive_integration: enableOneDriveIntegration, - file: { - max_size: fileMaxSize === '' ? null : fileMaxSize, - max_count: fileMaxCount === '' ? null : fileMaxCount - }, - RAG_FULL_CONTEXT: RAG_FULL_CONTEXT, - BYPASS_EMBEDDING_AND_RETRIEVAL: BYPASS_EMBEDDING_AND_RETRIEVAL, - chunk: { - text_splitter: textSplitter, - chunk_overlap: chunkOverlap, - chunk_size: chunkSize - }, - content_extraction: { - engine: contentExtractionEngine, - tika_server_url: tikaServerUrl, - docling_server_url: doclingServerUrl, - document_intelligence_config: { - key: documentIntelligenceKey, - endpoint: documentIntelligenceEndpoint - }, - mistral_ocr_config: { - api_key: mistralApiKey - } - } - }); - - await updateQuerySettings(localStorage.token, querySettings); - + const res = await updateRAGConfig(localStorage.token, RAGConfig); dispatch('save'); }; @@ -261,46 +214,11 @@ } }; - const toggleHybridSearch = async () => { - querySettings = await updateQuerySettings(localStorage.token, querySettings); - }; - onMount(async () => { await setEmbeddingConfig(); await setRerankingConfig(); - querySettings = await getQuerySettings(localStorage.token); - - const res = await getRAGConfig(localStorage.token); - - if (res) { - pdfExtractImages = res.pdf_extract_images; - - textSplitter = res.chunk.text_splitter; - chunkSize = res.chunk.chunk_size; - chunkOverlap = res.chunk.chunk_overlap; - - RAG_FULL_CONTEXT = res.RAG_FULL_CONTEXT; - BYPASS_EMBEDDING_AND_RETRIEVAL = res.BYPASS_EMBEDDING_AND_RETRIEVAL; - - contentExtractionEngine = res.content_extraction.engine; - tikaServerUrl = res.content_extraction.tika_server_url; - doclingServerUrl = res.content_extraction.docling_server_url; - - showTikaServerUrl = contentExtractionEngine === 'tika'; - showDoclingServerUrl = contentExtractionEngine === 'docling'; - documentIntelligenceEndpoint = res.content_extraction.document_intelligence_config.endpoint; - documentIntelligenceKey = res.content_extraction.document_intelligence_config.key; - showDocumentIntelligenceConfig = contentExtractionEngine === 'document_intelligence'; - mistralApiKey = res.content_extraction.mistral_ocr_config.api_key; - showMistralOcrConfig = contentExtractionEngine === 'mistral_ocr'; - - fileMaxSize = res?.file.max_size ?? ''; - fileMaxCount = res?.file.max_count ?? ''; - - enableGoogleDriveIntegration = res.enable_google_drive_integration; - enableOneDriveIntegration = res.enable_onedrive_integration; - } + RAGConfig = await getRAGConfig(localStorage.token); }); @@ -332,7 +250,6 @@ }} /> - { @@ -353,339 +270,93 @@ submitHandler(); }} > -
-
-
-
{$i18n.t('General')}
- -
- -
-
-
- {$i18n.t('Content Extraction Engine')} -
-
- -
-
- {#if contentExtractionEngine === 'tika'} -
-
- -
-
- {:else if contentExtractionEngine === 'docling'} -
- -
- {:else if contentExtractionEngine === 'document_intelligence'} -
- - -
- {:else if contentExtractionEngine === 'mistral_ocr'} -
- -
- {/if} -
- - {#if contentExtractionEngine === ''} -
-
- {$i18n.t('PDF Extract Images (OCR)')} -
-
- -
-
- {/if} - -
-
- - {$i18n.t('Bypass Embedding and Retrieval')} - -
-
- - - -
-
- - {#if !BYPASS_EMBEDDING_AND_RETRIEVAL} -
-
{$i18n.t('Text Splitter')}
-
- -
-
- -
-
-
-
- {$i18n.t('Chunk Size')} -
-
- -
-
- -
-
- {$i18n.t('Chunk Overlap')} -
- -
- -
-
-
-
- {/if} -
- - {#if !BYPASS_EMBEDDING_AND_RETRIEVAL} + {#if RAGConfig} +
+
-
{$i18n.t('Embedding')}
+
{$i18n.t('General')}

-
+
-
- {$i18n.t('Embedding Model Engine')} +
+ {$i18n.t('Content Extraction Engine')}
-
+
- {#if embeddingEngine === 'openai'} -
- - - + {#if RAGConfig.CONTENT_EXTRACTION_ENGINE === ''} +
+
+
+ {$i18n.t('PDF Extract Images (OCR)')} +
+
+ +
+
- {:else if embeddingEngine === 'ollama'} + {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika'} +
+
+ +
+
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling'} +
+ +
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
- +
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'} +
+
{/if}
-
-
{$i18n.t('Embedding Model')}
- -
- {#if embeddingEngine === 'ollama'} -
-
- -
-
- {:else} -
-
- -
- - {#if embeddingEngine === ''} - - {/if} -
- {/if} -
- -
- {$i18n.t( - 'Warning: If you update or change your embedding model, you will need to re-import all documents.' - )} -
-
- - {#if embeddingEngine === 'ollama' || embeddingEngine === 'openai'} -
-
{$i18n.t('Embedding Batch Size')}
- -
- -
-
- {/if} -
- -
-
{$i18n.t('Retrieval')}
- -
-
-
{$i18n.t('Full Context Mode')}
+
+ + {$i18n.t('Bypass Embedding and Retrieval')} + +
- +
- {#if !RAG_FULL_CONTEXT} + {#if !RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL}
-
{$i18n.t('Hybrid Search')}
+
{$i18n.t('Text Splitter')}
- { - toggleHybridSearch(); - }} - /> +
- {#if querySettings.hybrid === true} -
-
{$i18n.t('Reranking Model')}
+
+
+
+
+ {$i18n.t('Chunk Size')} +
+
+ +
+
-
+
+
+ {$i18n.t('Chunk Overlap')} +
+ +
+ +
+
+
+
+ {/if} +
+ + {#if !RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL} +
+
{$i18n.t('Embedding')}
+ +
+ +
+
+
+ {$i18n.t('Embedding Model Engine')} +
+
+ +
+
+ + {#if embeddingEngine === 'openai'} +
+ + + +
+ {:else if embeddingEngine === 'ollama'} +
+ + + +
+ {/if} +
+ +
+
{$i18n.t('Embedding Model')}
+ +
+ {#if embeddingEngine === 'ollama'}
+
+
+ {:else} +
+
+
- + {/if} + + {/if}
-
+ {/if}
- {/if} -
-
{$i18n.t('Top K')}
-
- +
+ {$i18n.t( + 'Warning: If you update or change your embedding model, you will need to re-import all documents.' + )}
- {#if querySettings.hybrid === true} -
-
{$i18n.t('Top K Reranker')}
+ {#if embeddingEngine === 'ollama' || embeddingEngine === 'openai'} +
+
+ {$i18n.t('Embedding Batch Size')} +
+ +
+ +
+
+ {/if} +
+ +
+
{$i18n.t('Retrieval')}
+ +
+ +
+
{$i18n.t('Full Context Mode')}
+
+ + + +
+
+ + {#if !RAGConfig.RAG_FULL_CONTEXT} +
+
{$i18n.t('Hybrid Search')}
+
+ { + submitHandler(); + }} + /> +
+
+ + {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} +
+
{$i18n.t('Reranking Model')}
+ +
+
+
+ +
+ +
+
+
+ {/if} + +
+
{$i18n.t('Top K')}
- {/if} - {#if querySettings.hybrid === true} -
-
-
{$i18n.t('Minimum Score')}
+ {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} +
+
{$i18n.t('Top K Reranker')}
-
- {$i18n.t( - 'Note: If you set a minimum score, the search will only return documents with a score greater than or equal to the minimum score.' - )} -
-
- {/if} - {/if} + {/if} -
-
{$i18n.t('RAG Template')}
-
- -