diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 85fac016a..441c99efb 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1928,6 +1928,11 @@ RAG_RELEVANCE_THRESHOLD = PersistentConfig( "rag.relevance_threshold", float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")), ) +RAG_HYBRID_BM25_WEIGHT = PersistentConfig( + "RAG_HYBRID_BM25_WEIGHT", + "rag.hybrid_bm25_weight", + float(os.environ.get("RAG_HYBRID_BM25_WEIGHT", "0.5")), +) ENABLE_RAG_HYBRID_SEARCH = PersistentConfig( "ENABLE_RAG_HYBRID_SEARCH", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 1c3681c22..0a274d028 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -196,7 +196,10 @@ from open_webui.config import ( RAG_RERANKING_MODEL_TRUST_REMOTE_CODE, RAG_EMBEDDING_ENGINE, RAG_EMBEDDING_BATCH_SIZE, + RAG_TOP_K, + RAG_TOP_K_RERANKER, RAG_RELEVANCE_THRESHOLD, + RAG_HYBRID_BM25_WEIGHT, RAG_ALLOWED_FILE_EXTENSIONS, RAG_FILE_MAX_COUNT, RAG_FILE_MAX_SIZE, @@ -217,8 +220,6 @@ from open_webui.config import ( DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, MISTRAL_OCR_API_KEY, - RAG_TOP_K, - RAG_TOP_K_RERANKER, RAG_TEXT_SPLITTER, TIKTOKEN_ENCODING_NAME, PDF_EXTRACT_IMAGES, @@ -647,6 +648,7 @@ app.state.FUNCTIONS = {} app.state.config.TOP_K = RAG_TOP_K app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD +app.state.config.HYBRID_BM25_WEIGHT = RAG_HYBRID_BM25_WEIGHT app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index a132d7201..b7b4912ac 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -116,6 +116,7 @@ def query_doc_with_hybrid_search( reranking_function, k_reranker: int, r: float, + hybrid_bm25_weight: float, ) -> dict: try: log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") @@ -131,9 +132,20 @@ def query_doc_with_hybrid_search( top_k=k, ) - ensemble_retriever = EnsembleRetriever( - retrievers=[bm25_retriever, vector_search_retriever], weights=[0.5, 0.5] - ) + if hybrid_bm25_weight <= 0: + ensemble_retriever = EnsembleRetriever( + retrievers=[vector_search_retriever], weights=[1.] + ) + elif hybrid_bm25_weight >= 1: + ensemble_retriever = EnsembleRetriever( + retrievers=[bm25_retriever], weights=[1.] + ) + else: + ensemble_retriever = EnsembleRetriever( + retrievers=[bm25_retriever, vector_search_retriever], + weights=[hybrid_bm25_weight, 1. - hybrid_bm25_weight] + ) + compressor = RerankCompressor( embedding_function=embedding_function, top_n=k_reranker, @@ -313,6 +325,7 @@ def query_collection_with_hybrid_search( reranking_function, k_reranker: int, r: float, + hybrid_bm25_weight: float, ) -> dict: results = [] error = False @@ -346,6 +359,7 @@ def query_collection_with_hybrid_search( reranking_function=reranking_function, k_reranker=k_reranker, r=r, + hybrid_bm25_weight=hybrid_bm25_weight, ) return result, None except Exception as e: @@ -433,6 +447,7 @@ def get_sources_from_files( reranking_function, k_reranker, r, + hybrid_bm25_weight, hybrid_search, full_context=False, ): @@ -550,6 +565,7 @@ def get_sources_from_files( reranking_function=reranking_function, k_reranker=k_reranker, r=r, + hybrid_bm25_weight=hybrid_bm25_weight, ) except Exception as e: log.debug( diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 82114d755..98f79c7fe 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -349,6 +349,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, + "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT, # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, @@ -494,6 +495,7 @@ class ConfigForm(BaseModel): ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None TOP_K_RERANKER: Optional[int] = None RELEVANCE_THRESHOLD: Optional[float] = None + HYBRID_BM25_WEIGHT: Optional[float] = None # Content extraction settings CONTENT_EXTRACTION_ENGINE: Optional[str] = None @@ -580,6 +582,11 @@ async def update_rag_config( if form_data.RELEVANCE_THRESHOLD is not None else request.app.state.config.RELEVANCE_THRESHOLD ) + request.app.state.config.HYBRID_BM25_WEIGHT = ( + form_data.HYBRID_BM25_WEIGHT + if form_data.HYBRID_BM25_WEIGHT is not None + else request.app.state.config.HYBRID_BM25_WEIGHT + ) # Content extraction settings request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( @@ -842,6 +849,7 @@ async def update_rag_config( "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, + "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT, # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, @@ -1796,6 +1804,11 @@ def query_doc_handler( if form_data.r else request.app.state.config.RELEVANCE_THRESHOLD ), + hybrid_bm25_weight=( + form_data.hybrid_bm25_weight + if form_data.hybrid_bm25_weight + else request.app.state.config.HYBRID_BM25_WEIGHT + ), user=user, ) else: @@ -1847,6 +1860,11 @@ def query_collection_handler( if form_data.r else request.app.state.config.RELEVANCE_THRESHOLD ), + hybrid_bm25_weight=( + form_data.hybrid_bm25_weight + if form_data.hybrid_bm25_weight + else request.app.state.config.HYBRID_BM25_WEIGHT + ), ) else: return query_collection( diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 2d519ad9c..39b2a4c19 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -643,6 +643,7 @@ async def chat_completion_files_handler( reranking_function=request.app.state.rf, k_reranker=request.app.state.config.TOP_K_RERANKER, r=request.app.state.config.RELEVANCE_THRESHOLD, + hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, full_context=request.app.state.config.RAG_FULL_CONTEXT, ), diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 0660dc7ae..1d4723081 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -771,6 +771,24 @@ {/if} + + {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} +
+
{$i18n.t('Weight of BM25 Retrieval')}
+
+ +
+
+ {/if} {/if}
diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index df367d5c8..7b2a39128 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -425,6 +425,7 @@ "Enter Application DN Password": "", "Enter Bing Search V7 Endpoint": "", "Enter Bing Search V7 Subscription Key": "", + "Enter BM25 Weight": "", "Enter Bocha Search API Key": "", "Enter Brave Search API Key": "", "Enter certificate path": "", @@ -1304,6 +1305,7 @@ "What’s New in": "", "When enabled, the model will respond to each chat message in real-time, generating a response as soon as the user sends a message. This mode is useful for live chat applications, but may impact performance on slower hardware.": "", "wherever you are": "", + "Weight of BM25 Retrieval": "", "Whisper (Local)": "", "Why?": "", "Widescreen Mode": "",