From 325ca98773f73f6ac5c81f8bf7a51ad0a7f1b6a5 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 3 Oct 2024 06:43:50 -0700 Subject: [PATCH] enh: vector db delete filter support --- .../apps/retrieval/vector/dbs/chroma.py | 12 ++++++-- .../apps/retrieval/vector/dbs/milvus.py | 29 +++++++++++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py index fe065f868..a73eb92dc 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py @@ -111,11 +111,19 @@ class ChromaClient: ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. collection = self.client.get_collection(name=collection_name) if collection: - collection.delete(ids=ids) + if ids: + collection.delete(ids=ids) + elif filter: + collection.delete(where=filter) def reset(self): # Resets the database. This will delete all collections and item entries. diff --git a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py index 77300acf2..4c8305ba8 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py @@ -187,13 +187,32 @@ class MilvusClient: ], ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. - return self.client.delete( - collection_name=f"{self.collection_prefix}_{collection_name}", - ids=ids, - ) + if ids: + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + ids=ids, + ) + elif filter: + # Convert the filter dictionary to a string using JSON_CONTAINS. + filter_string = " && ".join( + [ + f"JSON_CONTAINS(metadata[{key}], '{[value] if isinstance(value, str) else value}')" + for key, value in filter.items() + ] + ) + + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + filter=filter_string, + ) def reset(self): # Resets the database. This will delete all collections and item entries.