rename BM25_WEIGHT -> HYBRID_BM25_WEIGHT

This commit is contained in:
Jan Kessler 2025-05-23 22:06:44 +02:00
parent 308d8ac04a
commit e70dd33233
No known key found for this signature in database
GPG Key ID: FCF0DCB4ADFC53E7
6 changed files with 32 additions and 32 deletions

View File

@ -1928,10 +1928,10 @@ RAG_RELEVANCE_THRESHOLD = PersistentConfig(
"rag.relevance_threshold", "rag.relevance_threshold",
float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")), float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")),
) )
RAG_BM25_WEIGHT = PersistentConfig( RAG_HYBRID_BM25_WEIGHT = PersistentConfig(
"RAG_BM25_WEIGHT", "RAG_HYBRID_BM25_WEIGHT",
"rag.bm25_weight", "rag.hybrid_bm25_weight",
float(os.environ.get("RAG_BM25_WEIGHT", "0.5")), float(os.environ.get("RAG_HYBRID_BM25_WEIGHT", "0.5")),
) )
ENABLE_RAG_HYBRID_SEARCH = PersistentConfig( ENABLE_RAG_HYBRID_SEARCH = PersistentConfig(

View File

@ -199,7 +199,7 @@ from open_webui.config import (
RAG_TOP_K, RAG_TOP_K,
RAG_TOP_K_RERANKER, RAG_TOP_K_RERANKER,
RAG_RELEVANCE_THRESHOLD, RAG_RELEVANCE_THRESHOLD,
RAG_BM25_WEIGHT, RAG_HYBRID_BM25_WEIGHT,
RAG_ALLOWED_FILE_EXTENSIONS, RAG_ALLOWED_FILE_EXTENSIONS,
RAG_FILE_MAX_COUNT, RAG_FILE_MAX_COUNT,
RAG_FILE_MAX_SIZE, RAG_FILE_MAX_SIZE,
@ -647,7 +647,7 @@ app.state.FUNCTIONS = {}
app.state.config.TOP_K = RAG_TOP_K app.state.config.TOP_K = RAG_TOP_K
app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER
app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
app.state.config.BM25_WEIGHT = RAG_BM25_WEIGHT app.state.config.HYBRID_BM25_WEIGHT = RAG_HYBRID_BM25_WEIGHT
app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS
app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE
app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT

View File

@ -116,7 +116,7 @@ def query_doc_with_hybrid_search(
reranking_function, reranking_function,
k_reranker: int, k_reranker: int,
r: float, r: float,
bm25_weight: float, hybrid_bm25_weight: float,
) -> dict: ) -> dict:
try: try:
log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
@ -132,18 +132,18 @@ def query_doc_with_hybrid_search(
top_k=k, top_k=k,
) )
if bm25_weight <= 0: if hybrid_bm25_weight <= 0:
ensemble_retriever = EnsembleRetriever( ensemble_retriever = EnsembleRetriever(
retrievers=[vector_search_retriever], weights=[1.] retrievers=[vector_search_retriever], weights=[1.]
) )
elif bm25_weight >= 1: elif hybrid_bm25_weight >= 1:
ensemble_retriever = EnsembleRetriever( ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever], weights=[1.] retrievers=[bm25_retriever], weights=[1.]
) )
else: else:
ensemble_retriever = EnsembleRetriever( ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_search_retriever], retrievers=[bm25_retriever, vector_search_retriever],
weights=[bm25_weight, 1. - bm25_weight] weights=[hybrid_bm25_weight, 1. - hybrid_bm25_weight]
) )
compressor = RerankCompressor( compressor = RerankCompressor(
@ -325,7 +325,7 @@ def query_collection_with_hybrid_search(
reranking_function, reranking_function,
k_reranker: int, k_reranker: int,
r: float, r: float,
bm25_weight: float, hybrid_bm25_weight: float,
) -> dict: ) -> dict:
results = [] results = []
error = False error = False
@ -359,7 +359,7 @@ def query_collection_with_hybrid_search(
reranking_function=reranking_function, reranking_function=reranking_function,
k_reranker=k_reranker, k_reranker=k_reranker,
r=r, r=r,
bm25_weight=bm25_weight, hybrid_bm25_weight=hybrid_bm25_weight,
) )
return result, None return result, None
except Exception as e: except Exception as e:
@ -447,7 +447,7 @@ def get_sources_from_files(
reranking_function, reranking_function,
k_reranker, k_reranker,
r, r,
bm25_weight, hybrid_bm25_weight,
hybrid_search, hybrid_search,
full_context=False, full_context=False,
): ):
@ -565,7 +565,7 @@ def get_sources_from_files(
reranking_function=reranking_function, reranking_function=reranking_function,
k_reranker=k_reranker, k_reranker=k_reranker,
r=r, r=r,
bm25_weight=bm25_weight, hybrid_bm25_weight=hybrid_bm25_weight,
) )
except Exception as e: except Exception as e:
log.debug( log.debug(

View File

@ -349,7 +349,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
"TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
"RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
"BM25_WEIGHT": request.app.state.config.BM25_WEIGHT, "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
# Content extraction settings # Content extraction settings
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@ -493,7 +493,7 @@ class ConfigForm(BaseModel):
ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None
TOP_K_RERANKER: Optional[int] = None TOP_K_RERANKER: Optional[int] = None
RELEVANCE_THRESHOLD: Optional[float] = None RELEVANCE_THRESHOLD: Optional[float] = None
BM25_WEIGHT: Optional[float] = None HYBRID_BM25_WEIGHT: Optional[float] = None
# Content extraction settings # Content extraction settings
CONTENT_EXTRACTION_ENGINE: Optional[str] = None CONTENT_EXTRACTION_ENGINE: Optional[str] = None
@ -580,10 +580,10 @@ async def update_rag_config(
if form_data.RELEVANCE_THRESHOLD is not None if form_data.RELEVANCE_THRESHOLD is not None
else request.app.state.config.RELEVANCE_THRESHOLD else request.app.state.config.RELEVANCE_THRESHOLD
) )
request.app.state.config.BM25_WEIGHT = ( request.app.state.config.HYBRID_BM25_WEIGHT = (
form_data.BM25_WEIGHT form_data.HYBRID_BM25_WEIGHT
if form_data.BM25_WEIGHT is not None if form_data.HYBRID_BM25_WEIGHT is not None
else request.app.state.config.BM25_WEIGHT else request.app.state.config.HYBRID_BM25_WEIGHT
) )
# Content extraction settings # Content extraction settings
@ -844,7 +844,7 @@ async def update_rag_config(
"ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
"TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
"RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
"BM25_WEIGHT": request.app.state.config.BM25_WEIGHT, "HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
# Content extraction settings # Content extraction settings
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@ -1782,10 +1782,10 @@ def query_doc_handler(
if form_data.r if form_data.r
else request.app.state.config.RELEVANCE_THRESHOLD else request.app.state.config.RELEVANCE_THRESHOLD
), ),
bm25_weight=( hybrid_bm25_weight=(
form_data.bm25_weight form_data.hybrid_bm25_weight
if form_data.bm25_weight if form_data.hybrid_bm25_weight
else request.app.state.config.BM25_WEIGHT else request.app.state.config.HYBRID_BM25_WEIGHT
), ),
user=user, user=user,
) )
@ -1838,10 +1838,10 @@ def query_collection_handler(
if form_data.r if form_data.r
else request.app.state.config.RELEVANCE_THRESHOLD else request.app.state.config.RELEVANCE_THRESHOLD
), ),
bm25_weight=( hybrid_bm25_weight=(
form_data.bm25_weight form_data.hybrid_bm25_weight
if form_data.bm25_weight if form_data.hybrid_bm25_weight
else request.app.state.config.BM25_WEIGHT else request.app.state.config.HYBRID_BM25_WEIGHT
), ),
) )
else: else:

View File

@ -603,7 +603,7 @@ async def chat_completion_files_handler(
reranking_function=request.app.state.rf, reranking_function=request.app.state.rf,
k_reranker=request.app.state.config.TOP_K_RERANKER, k_reranker=request.app.state.config.TOP_K_RERANKER,
r=request.app.state.config.RELEVANCE_THRESHOLD, r=request.app.state.config.RELEVANCE_THRESHOLD,
bm25_weight=request.app.state.config.BM25_WEIGHT, hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT,
hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
full_context=request.app.state.config.RAG_FULL_CONTEXT, full_context=request.app.state.config.RAG_FULL_CONTEXT,
), ),

View File

@ -773,14 +773,14 @@
{#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true}
<div class="mb-2.5 flex w-full justify-between"> <div class="mb-2.5 flex w-full justify-between">
<div class="self-center text-xs font-medium">{$i18n.t('BM25 Weight')}</div> <div class="self-center text-xs font-medium">{$i18n.t('Weight of BM25 Retrieval')}</div>
<div class="flex items-center relative"> <div class="flex items-center relative">
<input <input
class="flex-1 w-full text-sm bg-transparent outline-hidden" class="flex-1 w-full text-sm bg-transparent outline-hidden"
type="number" type="number"
step="0.01" step="0.01"
placeholder={$i18n.t('Enter BM25 Weight')} placeholder={$i18n.t('Enter BM25 Weight')}
bind:value={RAGConfig.BM25_WEIGHT} bind:value={RAGConfig.HYBRID_BM25_WEIGHT}
autocomplete="off" autocomplete="off"
min="0.0" min="0.0"
max="1.0" max="1.0"