diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 965b49b88..df53da78d 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -139,10 +139,20 @@ def query_doc_with_hybrid_search( ) result = compression_retriever.invoke(query) + + distances = [d.metadata.get("score") for d in result] + documents = [d.page_content for d in result] + metadatas = [d.metadata for d in result] + + # retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker + if k < k_reranker: + sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True) + sorted_items = sorted_items[:k] + distances, documents, metadatas = map(list, zip(*sorted_items)) result = { - "distances": [[d.metadata.get("score") for d in result]], - "documents": [[d.page_content for d in result]], - "metadatas": [[d.metadata for d in result]], + "distances": [distances], + "documents": [documents] + "metadatas": [metadatas], } log.info(