mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Merge branch 'dev' into fix-db-order
This commit is contained in:
@@ -105,7 +105,7 @@ class TikaLoader:
|
||||
|
||||
if r.ok:
|
||||
raw_metadata = r.json()
|
||||
text = raw_metadata.get("X-TIKA:content", "<No text content found>")
|
||||
text = raw_metadata.get("X-TIKA:content", "<No text content found>").strip()
|
||||
|
||||
if "Content-Type" in raw_metadata:
|
||||
headers["Content-Type"] = raw_metadata["Content-Type"]
|
||||
|
||||
@@ -106,6 +106,7 @@ def query_doc_with_hybrid_search(
|
||||
embedding_function,
|
||||
k: int,
|
||||
reranking_function,
|
||||
k_reranker: int,
|
||||
r: float,
|
||||
) -> dict:
|
||||
try:
|
||||
@@ -128,7 +129,7 @@ def query_doc_with_hybrid_search(
|
||||
)
|
||||
compressor = RerankCompressor(
|
||||
embedding_function=embedding_function,
|
||||
top_n=k,
|
||||
top_n=k_reranker,
|
||||
reranking_function=reranking_function,
|
||||
r_score=r,
|
||||
)
|
||||
@@ -138,10 +139,20 @@ def query_doc_with_hybrid_search(
|
||||
)
|
||||
|
||||
result = compression_retriever.invoke(query)
|
||||
|
||||
distances = [d.metadata.get("score") for d in result]
|
||||
documents = [d.page_content for d in result]
|
||||
metadatas = [d.metadata for d in result]
|
||||
|
||||
# retrieve only min(k, k_reranker) items, sort and cut by distance if k < k_reranker
|
||||
if k < k_reranker:
|
||||
sorted_items = sorted(zip(distances, metadatas, documents), key=lambda x: x[0], reverse=True)
|
||||
sorted_items = sorted_items[:k]
|
||||
distances, documents, metadatas = map(list, zip(*sorted_items))
|
||||
result = {
|
||||
"distances": [[d.metadata.get("score") for d in result]],
|
||||
"documents": [[d.page_content for d in result]],
|
||||
"metadatas": [[d.metadata for d in result]],
|
||||
"distances": [distances],
|
||||
"documents": [documents],
|
||||
"metadatas": [metadatas],
|
||||
}
|
||||
|
||||
log.info(
|
||||
@@ -264,6 +275,7 @@ def query_collection_with_hybrid_search(
|
||||
embedding_function,
|
||||
k: int,
|
||||
reranking_function,
|
||||
k_reranker: int,
|
||||
r: float,
|
||||
) -> dict:
|
||||
results = []
|
||||
@@ -277,6 +289,7 @@ def query_collection_with_hybrid_search(
|
||||
embedding_function=embedding_function,
|
||||
k=k,
|
||||
reranking_function=reranking_function,
|
||||
k_reranker=k_reranker,
|
||||
r=r,
|
||||
)
|
||||
results.append(result)
|
||||
@@ -290,10 +303,8 @@ def query_collection_with_hybrid_search(
|
||||
raise Exception(
|
||||
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
|
||||
)
|
||||
|
||||
return merge_and_sort_query_results(results, k=k)
|
||||
|
||||
|
||||
def get_embedding_function(
|
||||
embedding_engine,
|
||||
embedding_model,
|
||||
@@ -337,6 +348,7 @@ def get_sources_from_files(
|
||||
embedding_function,
|
||||
k,
|
||||
reranking_function,
|
||||
k_reranker,
|
||||
r,
|
||||
hybrid_search,
|
||||
full_context=False,
|
||||
@@ -453,6 +465,7 @@ def get_sources_from_files(
|
||||
embedding_function=embedding_function,
|
||||
k=k,
|
||||
reranking_function=reranking_function,
|
||||
k_reranker=k_reranker,
|
||||
r=r,
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@@ -172,12 +172,19 @@ class ChromaClient:
|
||||
filter: Optional[dict] = None,
|
||||
):
|
||||
# Delete the items from the collection based on the ids.
|
||||
collection = self.client.get_collection(name=collection_name)
|
||||
if collection:
|
||||
if ids:
|
||||
collection.delete(ids=ids)
|
||||
elif filter:
|
||||
collection.delete(where=filter)
|
||||
try:
|
||||
collection = self.client.get_collection(name=collection_name)
|
||||
if collection:
|
||||
if ids:
|
||||
collection.delete(ids=ids)
|
||||
elif filter:
|
||||
collection.delete(where=filter)
|
||||
except Exception as e:
|
||||
# If collection doesn't exist, that's fine - nothing to delete
|
||||
log.debug(
|
||||
f"Attempted to delete from non-existent collection {collection_name}. Ignoring."
|
||||
)
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
# Resets the database. This will delete all collections and item entries.
|
||||
|
||||
Reference in New Issue
Block a user