Merge pull request #14069 from Ithanil/bm25_weight

feat: Configurable weight for BM25Retriever during hybrid search
This commit is contained in:
Tim Jaeryang Baek
2025-05-24 01:13:03 +04:00
committed by GitHub
7 changed files with 67 additions and 5 deletions

View File

@@ -116,6 +116,7 @@ def query_doc_with_hybrid_search(
reranking_function,
k_reranker: int,
r: float,
hybrid_bm25_weight: float,
) -> dict:
try:
log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
@@ -131,9 +132,20 @@ def query_doc_with_hybrid_search(
top_k=k,
)
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_search_retriever], weights=[0.5, 0.5]
)
if hybrid_bm25_weight <= 0:
ensemble_retriever = EnsembleRetriever(
retrievers=[vector_search_retriever], weights=[1.]
)
elif hybrid_bm25_weight >= 1:
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever], weights=[1.]
)
else:
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_search_retriever],
weights=[hybrid_bm25_weight, 1. - hybrid_bm25_weight]
)
compressor = RerankCompressor(
embedding_function=embedding_function,
top_n=k_reranker,
@@ -313,6 +325,7 @@ def query_collection_with_hybrid_search(
reranking_function,
k_reranker: int,
r: float,
hybrid_bm25_weight: float,
) -> dict:
results = []
error = False
@@ -346,6 +359,7 @@ def query_collection_with_hybrid_search(
reranking_function=reranking_function,
k_reranker=k_reranker,
r=r,
hybrid_bm25_weight=hybrid_bm25_weight,
)
return result, None
except Exception as e:
@@ -433,6 +447,7 @@ def get_sources_from_files(
reranking_function,
k_reranker,
r,
hybrid_bm25_weight,
hybrid_search,
full_context=False,
):
@@ -550,6 +565,7 @@ def get_sources_from_files(
reranking_function=reranking_function,
k_reranker=k_reranker,
r=r,
hybrid_bm25_weight=hybrid_bm25_weight,
)
except Exception as e:
log.debug(