Merge pull request #14069 from Ithanil/bm25_weight

feat: Configurable weight for BM25Retriever during hybrid search
This commit is contained in:
Tim Jaeryang Baek
2025-05-24 01:13:03 +04:00
committed by GitHub
7 changed files with 67 additions and 5 deletions

View File

@@ -349,6 +349,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
"TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
"RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
"HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
# Content extraction settings
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@@ -494,6 +495,7 @@ class ConfigForm(BaseModel):
ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None
TOP_K_RERANKER: Optional[int] = None
RELEVANCE_THRESHOLD: Optional[float] = None
HYBRID_BM25_WEIGHT: Optional[float] = None
# Content extraction settings
CONTENT_EXTRACTION_ENGINE: Optional[str] = None
@@ -580,6 +582,11 @@ async def update_rag_config(
if form_data.RELEVANCE_THRESHOLD is not None
else request.app.state.config.RELEVANCE_THRESHOLD
)
request.app.state.config.HYBRID_BM25_WEIGHT = (
form_data.HYBRID_BM25_WEIGHT
if form_data.HYBRID_BM25_WEIGHT is not None
else request.app.state.config.HYBRID_BM25_WEIGHT
)
# Content extraction settings
request.app.state.config.CONTENT_EXTRACTION_ENGINE = (
@@ -842,6 +849,7 @@ async def update_rag_config(
"ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
"TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER,
"RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD,
"HYBRID_BM25_WEIGHT": request.app.state.config.HYBRID_BM25_WEIGHT,
# Content extraction settings
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
@@ -1796,6 +1804,11 @@ def query_doc_handler(
if form_data.r
else request.app.state.config.RELEVANCE_THRESHOLD
),
hybrid_bm25_weight=(
form_data.hybrid_bm25_weight
if form_data.hybrid_bm25_weight
else request.app.state.config.HYBRID_BM25_WEIGHT
),
user=user,
)
else:
@@ -1847,6 +1860,11 @@ def query_collection_handler(
if form_data.r
else request.app.state.config.RELEVANCE_THRESHOLD
),
hybrid_bm25_weight=(
form_data.hybrid_bm25_weight
if form_data.hybrid_bm25_weight
else request.app.state.config.HYBRID_BM25_WEIGHT
),
)
else:
return query_collection(