This commit is contained in:
Timothy Jaeryang Baek 2025-02-18 19:35:22 -08:00
parent 8f7528a0bc
commit 4ef7aff663

View File

@ -138,7 +138,7 @@ def query_doc_with_hybrid_search(
def merge_and_sort_query_results( def merge_and_sort_query_results(
query_results: list[dict], k: int, reverse: bool = False query_results: list[dict], k: int, reverse: bool = False
) -> list[dict]: ) -> list[dict]:
# Initialize lists to store combined data # Initialize lists to store combined data
combined_distances = [] combined_distances = []
@ -151,10 +151,17 @@ def merge_and_sort_query_results(
combined_documents.extend(data["documents"][0]) combined_documents.extend(data["documents"][0])
combined_metadatas.extend(data["metadatas"][0]) combined_metadatas.extend(data["metadatas"][0])
# DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals # DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals
combined_ids.extend([id + meta["file_id"] for id, meta in zip(data["ids"][0], data["metadatas"][0])]) combined_ids.extend(
[
f"{id}-{meta['file_id']}"
for id, meta in zip(data["ids"][0], data["metadatas"][0])
]
)
# Create a list of tuples (distance, document, metadata, ids) # Create a list of tuples (distance, document, metadata, ids)
combined = list(zip(combined_distances, combined_documents, combined_metadatas, combined_ids)) combined = list(
zip(combined_distances, combined_documents, combined_metadatas, combined_ids)
)
# Sort the list based on distances # Sort the list based on distances
combined.sort(key=lambda x: x[0], reverse=reverse) combined.sort(key=lambda x: x[0], reverse=reverse)