mirror of
https://github.com/open-webui/open-webui
synced 2025-04-22 07:18:29 +00:00
Fix wrong order for chromadb
This commit is contained in:
parent
1173459eee
commit
ec8fc727b8
@ -178,8 +178,7 @@ def merge_and_sort_query_results(
|
|||||||
query_results: list[dict], k: int, reverse: bool = False
|
query_results: list[dict], k: int, reverse: bool = False
|
||||||
) -> dict:
|
) -> dict:
|
||||||
# Initialize lists to store combined data
|
# Initialize lists to store combined data
|
||||||
combined = []
|
combined = dict() # To store documents with unique document hashes
|
||||||
seen_hashes = set() # To store unique document hashes
|
|
||||||
|
|
||||||
for data in query_results:
|
for data in query_results:
|
||||||
distances = data["distances"][0]
|
distances = data["distances"][0]
|
||||||
@ -192,10 +191,19 @@ def merge_and_sort_query_results(
|
|||||||
document.encode()
|
document.encode()
|
||||||
).hexdigest() # Compute a hash for uniqueness
|
).hexdigest() # Compute a hash for uniqueness
|
||||||
|
|
||||||
if doc_hash not in seen_hashes:
|
if doc_hash not in combined.keys():
|
||||||
seen_hashes.add(doc_hash)
|
combined[doc_hash] = (distance, document, metadata)
|
||||||
combined.append((distance, document, metadata))
|
continue # if doc is new, no further comparison is needed
|
||||||
|
|
||||||
|
# if doc is alredy in, but new distance is better, update
|
||||||
|
if not reverse and distance < combined[doc_hash][0]:
|
||||||
|
# Chroma uses unconventional cosine similarity, so we don't need to reverse the results
|
||||||
|
# https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
|
||||||
|
combined[doc_hash] = (distance, document, metadata)
|
||||||
|
if reverse and distance > combined[doc_hash][0]:
|
||||||
|
combined[doc_hash] = (distance, document, metadata)
|
||||||
|
|
||||||
|
combined = list(combined.values())
|
||||||
# Sort the list based on distances
|
# Sort the list based on distances
|
||||||
combined.sort(key=lambda x: x[0], reverse=reverse)
|
combined.sort(key=lambda x: x[0], reverse=reverse)
|
||||||
|
|
||||||
@ -204,6 +212,12 @@ def merge_and_sort_query_results(
|
|||||||
zip(*combined[:k]) if combined else ([], [], [])
|
zip(*combined[:k]) if combined else ([], [], [])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# if chromaDB, the distance is 0 (best) to 2 (worse)
|
||||||
|
# re-order to -1 (worst) to 1 (best) for relevance score
|
||||||
|
if not reverse:
|
||||||
|
sorted_distances = tuple(-dist for dist in sorted_distances)
|
||||||
|
sorted_distances = tuple(dist + 1 for dist in sorted_distances)
|
||||||
|
|
||||||
# Create and return the output dictionary
|
# Create and return the output dictionary
|
||||||
return {
|
return {
|
||||||
"distances": [list(sorted_distances)],
|
"distances": [list(sorted_distances)],
|
||||||
@ -294,11 +308,6 @@ def query_collection_with_hybrid_search(
|
|||||||
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
|
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
|
||||||
)
|
)
|
||||||
|
|
||||||
if VECTOR_DB == "chroma":
|
|
||||||
# Chroma uses unconventional cosine similarity, so we don't need to reverse the results
|
|
||||||
# https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
|
|
||||||
return merge_and_sort_query_results(results, k=k, reverse=False)
|
|
||||||
else:
|
|
||||||
return merge_and_sort_query_results(results, k=k, reverse=True)
|
return merge_and_sort_query_results(results, k=k, reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user