refac

2025-06-26 18:26:48 +00:00 · 2024-10-03 23:06:47 -07:00
parent d834bd2a18
commit 05970157f6
6 changed files with 38 additions and 17 deletions
--- a/backend/open_webui/apps/retrieval/main.py
+++ b/backend/open_webui/apps/retrieval/main.py
@@ -643,13 +643,16 @@ def save_docs_to_vector_db(

    # Check if entries with the same hash (metadata.hash) already exist
    if metadata and "hash" in metadata:
-        existing_docs = VECTOR_DB_CLIENT.query(
+        result = VECTOR_DB_CLIENT.query(
            collection_name=collection_name,
            filter={"hash": metadata["hash"]},
        )
-        if existing_docs:
-            log.info(f"Document with hash {metadata['hash']} already exists")
-            raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
+
+        if result:
+            existing_doc_ids = result.ids[0]
+            if existing_doc_ids:
+                log.info(f"Document with hash {metadata['hash']} already exists")
+                raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)

    if split:
        text_splitter = RecursiveCharacterTextSplitter(
--- a/backend/open_webui/apps/retrieval/utils.py
+++ b/backend/open_webui/apps/retrieval/utils.py
@@ -325,11 +325,16 @@ def get_rag_context(
        else:
            context = None

-            collection_names = (
-                file["collection_names"]
-                if file["type"] == "collection"
-                else [file["collection_name"]] if file["collection_name"] else []
-            )
+            collection_names = []
+            if file.get("type") == "collection":
+                if file.get("legacy"):
+                    collection_names = file.get("collection_names", [])
+                else:
+                    collection_names.append(file["id"])
+            elif file.get("collection_name"):
+                collection_names.append(file["collection_name"])
+            elif file.get("id"):
+                collection_names.append(f"file-{file['id']}")

            collection_names = set(collection_names).difference(extracted_collections)
            if not collection_names:
--- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py
+++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py
@@ -70,7 +70,7 @@ class ChromaClient:
            return None

    def query(
-        self, collection_name: str, filter: dict, limit: int = 1
+        self, collection_name: str, filter: dict, limit: int = 2
    ) -> Optional[GetResult]:
        # Query the items from the collection based on the filter.

@@ -82,15 +82,18 @@ class ChromaClient:
                    limit=limit,
                )

+                print(result)
+
                return GetResult(
                    **{
-                        "ids": result["ids"],
-                        "documents": result["documents"],
-                        "metadatas": result["metadatas"],
+                        "ids": [result["ids"]],
+                        "documents": [result["documents"]],
+                        "metadatas": [result["metadatas"]],
                    }
                )
            return None
        except Exception as e:
+            print(e)
            return None

    def get(self, collection_name: str) -> Optional[GetResult]:
--- a/backend/open_webui/apps/webui/routers/knowledge.py
+++ b/backend/open_webui/apps/webui/routers/knowledge.py
@@ -152,7 +152,13 @@ def add_file_to_knowledge_by_id(
        )

    # Add content to the vector database
-    process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
+    try:
+        process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=str(e),
+        )

    if knowledge:
        data = knowledge.data or {}
@@ -263,5 +269,6 @@ def remove_file_from_knowledge_by_id(

@router.delete("/{id}/delete", response_model=bool)
 async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)):
+    VECTOR_DB_CLIENT.delete_collection(collection_name=id)
    result = Knowledges.delete_knowledge_by_id(id=id)
    return result
--- a/backend/open_webui/constants.py
+++ b/backend/open_webui/constants.py
@@ -94,7 +94,9 @@ class ERROR_MESSAGES(str, Enum):
        lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}."
    )

-    DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
+    DUPLICATE_CONTENT = (
+        "Duplicate content detected. Please provide unique content to proceed."
+    )
    FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."


--- a/src/lib/components/workspace/Knowledge/Collection.svelte
+++ b/src/lib/components/workspace/Knowledge/Collection.svelte
@@ -94,7 +94,7 @@
 	const addFileHandler = async (fileId) => {
 		const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch(
 			(e) => {
-				console.error(e);
+				toast.error(e);
 			}
 		);

@@ -110,7 +110,7 @@
 			id,
 			fileId
 		).catch((e) => {
-			console.error(e);
+			toast.error(e);
 		});

 		if (updatedKnowledge) {
@@ -341,6 +341,7 @@
 										on:delete={(e) => {
 											console.log(e.detail);

+											selectedFileId = null;
 											deleteFileHandler(e.detail);
 										}}
 									/>