Address edge case with k < k_reranker, sort results for cutting off

This commit is contained in:
Marko Henning 2025-03-18 11:31:17 +01:00
parent 8b5b3f165a
commit c877b59cbc

View File

@ -139,10 +139,20 @@ def query_doc_with_hybrid_search(
) )
result = compression_retriever.invoke(query) result = compression_retriever.invoke(query)
distances = [d.metadata.get("score") for d in result]
documents = [d.page_content for d in result]
metadatas = [d.metadata for d in result]
# retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
if k < k_reranker:
sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
sorted_items = sorted_items[:k]
distances, documents, metadatas = map(list, zip(*sorted_items))
result = { result = {
"distances": [[d.metadata.get("score") for d in result]], "distances": [distances],
"documents": [[d.page_content for d in result]], "documents": [documents]
"metadatas": [[d.metadata for d in result]], "metadatas": [metadatas],
} }
log.info( log.info(