From e70dd3323390c9da5c9197efbbc50a07db7d79d2 Mon Sep 17 00:00:00 2001 From: Jan Kessler Date: Fri, 23 May 2025 22:06:44 +0200 Subject: [PATCH] rename BM25_WEIGHT -> HYBRID_BM25_WEIGHT --- backend/open_webui/config.py | 8 ++--- backend/open_webui/main.py | 4 +-- backend/open_webui/retrieval/utils.py | 16 +++++----- backend/open_webui/routers/retrieval.py | 30 +++++++++---------- backend/open_webui/utils/middleware.py | 2 +- .../admin/Settings/Documents.svelte | 4 +-- 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index eac03fd8a..d21832e3c 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1928,10 +1928,10 @@ RAG_RELEVANCE_THRESHOLD = PersistentConfig( "rag.relevance_threshold", float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")), ) -RAG_BM25_WEIGHT = PersistentConfig( - "RAG_BM25_WEIGHT", - "rag.bm25_weight", - float(os.environ.get("RAG_BM25_WEIGHT", "0.5")), +RAG_HYBRID_BM25_WEIGHT = PersistentConfig( + "RAG_HYBRID_BM25_WEIGHT", + "rag.hybrid_bm25_weight", + float(os.environ.get("RAG_HYBRID_BM25_WEIGHT", "0.5")), ) ENABLE_RAG_HYBRID_SEARCH = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 01dd42cf3..c384f55e6 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -199,7 +199,7 @@ from open_webui.config import ( RAG_TOP_K, RAG_TOP_K_RERANKER, RAG_RELEVANCE_THRESHOLD, - RAG_BM25_WEIGHT, + RAG_HYBRID_BM25_WEIGHT, RAG_ALLOWED_FILE_EXTENSIONS, RAG_FILE_MAX_COUNT, RAG_FILE_MAX_SIZE, @@ -647,7 +647,7 @@ app.state.FUNCTIONS = {} app.state.config.TOP_K = RAG_TOP_K app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD -app.state.config.BM25_WEIGHT = RAG_BM25_WEIGHT +app.state.config.HYBRID_BM25_WEIGHT = RAG_HYBRID_BM25_WEIGHT app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 70c3f4115..b7b4912ac 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -116,7 +116,7 @@ def query_doc_with_hybrid_search( reranking_function, k_reranker: int, r: float, - bm25_weight: float, + hybrid_bm25_weight: float, ) -> dict: try: log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") @@ -132,18 +132,18 @@ def query_doc_with_hybrid_search( top_k=k, ) - if bm25_weight <= 0: + if hybrid_bm25_weight <= 0: ensemble_retriever = EnsembleRetriever( retrievers=[vector_search_retriever], weights=[1.] ) - elif bm25_weight >= 1: + elif hybrid_bm25_weight >= 1: ensemble_retriever = EnsembleRetriever( retrievers=[bm25_retriever], weights=[1.] ) else: ensemble_retriever = EnsembleRetriever( retrievers=[bm25_retriever, vector_search_retriever], - weights=[bm25_weight, 1. - bm25_weight] + weights=[hybrid_bm25_weight, 1. - hybrid_bm25_weight] ) compressor = RerankCompressor( @@ -325,7 +325,7 @@ def query_collection_with_hybrid_search( reranking_function, k_reranker: int, r: float, - bm25_weight: float, + hybrid_bm25_weight: float, ) -> dict: results = [] error = False @@ -359,7 +359,7 @@ def query_collection_with_hybrid_search( reranking_function=reranking_function, k_reranker=k_reranker, r=r, - bm25_weight=bm25_weight, + hybrid_bm25_weight=hybrid_bm25_weight, ) return result, None except Exception as e: @@ -447,7 +447,7 @@ def get_sources_from_files( reranking_function, k_reranker, r, - bm25_weight, + hybrid_bm25_weight, hybrid_search, full_context=False, ): @@ -565,7 +565,7 @@ def get_sources_from_files( reranking_function=reranking_function, k_reranker=k_reranker, r=r, - bm25_weight=bm25_weight, + hybrid_bm25_weight=hybrid_bm25_weight, ) except Exception as e: log.debug( diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e31dba299..5076ff8a6 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -349,7 +349,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, - "BM25_WEIGHT": request.app.state.config.BM25_WEIGHT, + "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT, # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, @@ -493,7 +493,7 @@ class ConfigForm(BaseModel): ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None TOP_K_RERANKER: Optional[int] = None RELEVANCE_THRESHOLD: Optional[float] = None - BM25_WEIGHT: Optional[float] = None + HYBRID_BM25_WEIGHT: Optional[float] = None # Content extraction settings CONTENT_EXTRACTION_ENGINE: Optional[str] = None @@ -580,10 +580,10 @@ async def update_rag_config( if form_data.RELEVANCE_THRESHOLD is not None else request.app.state.config.RELEVANCE_THRESHOLD ) - request.app.state.config.BM25_WEIGHT = ( - form_data.BM25_WEIGHT - if form_data.BM25_WEIGHT is not None - else request.app.state.config.BM25_WEIGHT + request.app.state.config.HYBRID_BM25_WEIGHT = ( + form_data.HYBRID_BM25_WEIGHT + if form_data.HYBRID_BM25_WEIGHT is not None + else request.app.state.config.HYBRID_BM25_WEIGHT ) # Content extraction settings @@ -844,7 +844,7 @@ async def update_rag_config( "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, - "BM25_WEIGHT": request.app.state.config.BM25_WEIGHT, + "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT, # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, @@ -1782,10 +1782,10 @@ def query_doc_handler( if form_data.r else request.app.state.config.RELEVANCE_THRESHOLD ), - bm25_weight=( - form_data.bm25_weight - if form_data.bm25_weight - else request.app.state.config.BM25_WEIGHT + hybrid_bm25_weight=( + form_data.hybrid_bm25_weight + if form_data.hybrid_bm25_weight + else request.app.state.config.HYBRID_BM25_WEIGHT ), user=user, ) @@ -1838,10 +1838,10 @@ def query_collection_handler( if form_data.r else request.app.state.config.RELEVANCE_THRESHOLD ), - bm25_weight=( - form_data.bm25_weight - if form_data.bm25_weight - else request.app.state.config.BM25_WEIGHT + hybrid_bm25_weight=( + form_data.hybrid_bm25_weight + if form_data.hybrid_bm25_weight + else request.app.state.config.HYBRID_BM25_WEIGHT ), ) else: diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index c0ce2f063..9fa513bf9 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -603,7 +603,7 @@ async def chat_completion_files_handler( reranking_function=request.app.state.rf, k_reranker=request.app.state.config.TOP_K_RERANKER, r=request.app.state.config.RELEVANCE_THRESHOLD, - bm25_weight=request.app.state.config.BM25_WEIGHT, + hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, full_context=request.app.state.config.RAG_FULL_CONTEXT, ), diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 6dacb3257..7de9e8261 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -773,14 +773,14 @@ {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true}
-
{$i18n.t('BM25 Weight')}
+
{$i18n.t('Weight of BM25 Retrieval')}