mirror of
https://github.com/open-webui/open-webui
synced 2025-06-15 10:51:13 +00:00
revert: faulty dedup code
This commit is contained in:
parent
c882aacc23
commit
93d486d50e
@ -1714,7 +1714,7 @@ Respond to the user query using the provided context, incorporating inline citat
|
|||||||
- Respond in the same language as the user's query.
|
- Respond in the same language as the user's query.
|
||||||
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
|
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
|
||||||
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
|
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
|
||||||
- **Only include inline citations using [source_id] when a <source_id> tag is explicitly provided in the context.**
|
- **Only include inline citations using [source_id] (e.g., [1], [2]) when a `<source_id>` tag is explicitly provided in the context.**
|
||||||
- Do not cite if the <source_id> tag is not provided in the context.
|
- Do not cite if the <source_id> tag is not provided in the context.
|
||||||
- Do not use XML tags in your response.
|
- Do not use XML tags in your response.
|
||||||
- Ensure citations are concise and directly related to the information provided.
|
- Ensure citations are concise and directly related to the information provided.
|
||||||
|
@ -14,7 +14,8 @@ from langchain_core.documents import Document
|
|||||||
|
|
||||||
from open_webui.config import VECTOR_DB
|
from open_webui.config import VECTOR_DB
|
||||||
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
|
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
|
||||||
from open_webui.utils.misc import get_last_user_message
|
from open_webui.utils.misc import get_last_user_message, calculate_sha256_string
|
||||||
|
|
||||||
from open_webui.models.users import UserModel
|
from open_webui.models.users import UserModel
|
||||||
|
|
||||||
from open_webui.env import (
|
from open_webui.env import (
|
||||||
@ -178,45 +179,31 @@ def merge_and_sort_query_results(
|
|||||||
combined_distances = []
|
combined_distances = []
|
||||||
combined_documents = []
|
combined_documents = []
|
||||||
combined_metadatas = []
|
combined_metadatas = []
|
||||||
combined_ids = []
|
|
||||||
|
|
||||||
for data in query_results:
|
for data in query_results:
|
||||||
combined_distances.extend(data["distances"][0])
|
combined_distances.extend(data["distances"][0])
|
||||||
combined_documents.extend(data["documents"][0])
|
combined_documents.extend(data["documents"][0])
|
||||||
combined_metadatas.extend(data["metadatas"][0])
|
combined_metadatas.extend(data["metadatas"][0])
|
||||||
# DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals
|
|
||||||
combined_ids.extend(
|
|
||||||
[
|
|
||||||
f"{id}-{meta['file_id']}"
|
|
||||||
for id, meta in zip(data["ids"][0], data["metadatas"][0])
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create a list of tuples (distance, document, metadata, ids)
|
# Create a list of tuples (distance, document, metadata)
|
||||||
combined = list(
|
combined = list(zip(combined_distances, combined_documents, combined_metadatas))
|
||||||
zip(combined_distances, combined_documents, combined_metadatas, combined_ids)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort the list based on distances
|
# Sort the list based on distances
|
||||||
combined.sort(key=lambda x: x[0], reverse=reverse)
|
combined.sort(key=lambda x: x[0], reverse=reverse)
|
||||||
|
|
||||||
|
# We don't have anything :-(
|
||||||
|
if not combined:
|
||||||
sorted_distances = []
|
sorted_distances = []
|
||||||
sorted_documents = []
|
sorted_documents = []
|
||||||
sorted_metadatas = []
|
sorted_metadatas = []
|
||||||
# Otherwise we don't have anything :-(
|
else:
|
||||||
if combined:
|
|
||||||
# Unzip the sorted list
|
# Unzip the sorted list
|
||||||
all_distances, all_documents, all_metadatas, all_ids = zip(*combined)
|
sorted_distances, sorted_documents, sorted_metadatas = zip(*combined)
|
||||||
seen_ids = set()
|
|
||||||
# Slicing the lists to include only k elements
|
# Slicing the lists to include only k elements
|
||||||
for index, id in enumerate(all_ids):
|
sorted_distances = list(sorted_distances)[:k]
|
||||||
if id not in seen_ids:
|
sorted_documents = list(sorted_documents)[:k]
|
||||||
sorted_distances.append(all_distances[index])
|
sorted_metadatas = list(sorted_metadatas)[:k]
|
||||||
sorted_documents.append(all_documents[index])
|
|
||||||
sorted_metadatas.append(all_metadatas[index])
|
|
||||||
seen_ids.add(id)
|
|
||||||
if len(sorted_distances) >= k:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Create the output dictionary
|
# Create the output dictionary
|
||||||
result = {
|
result = {
|
||||||
|
Loading…
Reference in New Issue
Block a user