From 440894f8d3ead0ef438e89cbb5a1b46ff3cd58af Mon Sep 17 00:00:00 2001 From: Gabriel Ecegi Date: Sat, 14 Dec 2024 10:45:27 +0100 Subject: [PATCH] Fix process/files/batch --- backend/open_webui/apps/retrieval/main.py | 21 +++++++------------ .../apps/webui/routers/knowledge.py | 15 +++++++++---- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 86ea6bf41..4da322e70 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1062,30 +1062,24 @@ def process_files_batch( Process a batch of files and save them to the vector database. """ results: List[BatchProcessFilesResult] = [] - errors: List[BatchProcessFilesResult] = [] + errors: List[BatchProcessFilesResult] = [] collection_name = form_data.collection_name - # Prepare all documents first all_docs: List[Document] = [] - for file_request in form_data.files: + for file in form_data.files: try: - file = Files.get_file_by_id(file_request.file_id) - if not file: - log.error(f"process_files_batch: File {file_request.file_id} not found") - raise ValueError(f"File {file_request.file_id} not found") - - text_content = file_request.content + text_content = file.data.get("content", "") docs: List[Document] = [ Document( page_content=text_content.replace("
", "\n"), metadata={ **file.meta, - "name": file_request.filename, + "name": file.filename, "created_by": file.user_id, "file_id": file.id, - "source": file_request.filename, + "source": file.filename, }, ) ] @@ -1101,9 +1095,9 @@ def process_files_batch( )) except Exception as e: - log.error(f"process_files_batch: Error processing file {file_request.file_id}: {str(e)}") + log.error(f"process_files_batch: Error processing file {file.id}: {str(e)}") errors.append(BatchProcessFilesResult( - file_id=file_request.file_id, + file_id=file.id, status="failed", error=str(e) )) @@ -1139,7 +1133,6 @@ def process_files_batch( errors=errors ) - class ProcessTextForm(BaseModel): name: str content: str diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py index ccc2251d1..21361c7f7 100644 --- a/backend/open_webui/apps/webui/routers/knowledge.py +++ b/backend/open_webui/apps/webui/routers/knowledge.py @@ -548,10 +548,17 @@ def add_files_to_knowledge_batch( files.append(file) # Process files - result = process_files_batch(BatchProcessFilesForm( - files=files, - collection_name=id - )) + try: + result = process_files_batch(BatchProcessFilesForm( + files=files, + collection_name=id + )) + except Exception as e: + log.error(f"add_files_to_knowledge_batch: Exception occurred: {e}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) # Add successful files to knowledge base data = knowledge.data or {}