This commit is contained in:
Timothy J. Baek 2024-10-03 23:06:47 -07:00
parent d834bd2a18
commit 05970157f6
6 changed files with 38 additions and 17 deletions

View File

@ -643,13 +643,16 @@ def save_docs_to_vector_db(
# Check if entries with the same hash (metadata.hash) already exist
if metadata and "hash" in metadata:
existing_docs = VECTOR_DB_CLIENT.query(
result = VECTOR_DB_CLIENT.query(
collection_name=collection_name,
filter={"hash": metadata["hash"]},
)
if existing_docs:
log.info(f"Document with hash {metadata['hash']} already exists")
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
if result:
existing_doc_ids = result.ids[0]
if existing_doc_ids:
log.info(f"Document with hash {metadata['hash']} already exists")
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
if split:
text_splitter = RecursiveCharacterTextSplitter(

View File

@ -325,11 +325,16 @@ def get_rag_context(
else:
context = None
collection_names = (
file["collection_names"]
if file["type"] == "collection"
else [file["collection_name"]] if file["collection_name"] else []
)
collection_names = []
if file.get("type") == "collection":
if file.get("legacy"):
collection_names = file.get("collection_names", [])
else:
collection_names.append(file["id"])
elif file.get("collection_name"):
collection_names.append(file["collection_name"])
elif file.get("id"):
collection_names.append(f"file-{file['id']}")
collection_names = set(collection_names).difference(extracted_collections)
if not collection_names:

View File

@ -70,7 +70,7 @@ class ChromaClient:
return None
def query(
self, collection_name: str, filter: dict, limit: int = 1
self, collection_name: str, filter: dict, limit: int = 2
) -> Optional[GetResult]:
# Query the items from the collection based on the filter.
@ -82,15 +82,18 @@ class ChromaClient:
limit=limit,
)
print(result)
return GetResult(
**{
"ids": result["ids"],
"documents": result["documents"],
"metadatas": result["metadatas"],
"ids": [result["ids"]],
"documents": [result["documents"]],
"metadatas": [result["metadatas"]],
}
)
return None
except Exception as e:
print(e)
return None
def get(self, collection_name: str) -> Optional[GetResult]:

View File

@ -152,7 +152,13 @@ def add_file_to_knowledge_by_id(
)
# Add content to the vector database
process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
try:
process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
if knowledge:
data = knowledge.data or {}
@ -263,5 +269,6 @@ def remove_file_from_knowledge_by_id(
@router.delete("/{id}/delete", response_model=bool)
async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)):
VECTOR_DB_CLIENT.delete_collection(collection_name=id)
result = Knowledges.delete_knowledge_by_id(id=id)
return result

View File

@ -94,7 +94,9 @@ class ERROR_MESSAGES(str, Enum):
lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}."
)
DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
DUPLICATE_CONTENT = (
"Duplicate content detected. Please provide unique content to proceed."
)
FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."

View File

@ -94,7 +94,7 @@
const addFileHandler = async (fileId) => {
const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch(
(e) => {
console.error(e);
toast.error(e);
}
);
@ -110,7 +110,7 @@
id,
fileId
).catch((e) => {
console.error(e);
toast.error(e);
});
if (updatedKnowledge) {
@ -341,6 +341,7 @@
on:delete={(e) => {
console.log(e.detail);
selectedFileId = null;
deleteFileHandler(e.detail);
}}
/>