Merge remote-tracking branch 'upstream/dev' into Individual-RAG-Config

This commit is contained in:
Maytown
2025-05-02 20:38:35 +02:00
161 changed files with 6412 additions and 2966 deletions

View File

@@ -10,7 +10,7 @@ from open_webui.models.knowledge import (
KnowledgeUserResponse,
RAGConfigForm
)
from open_webui.models.files import Files, FileModel
from open_webui.models.files import Files, FileModel, FileMetadataResponse
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
from open_webui.routers.retrieval import (
process_file,
@@ -179,10 +179,26 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases")
for knowledge_base in knowledge_bases:
try:
files = Files.get_files_by_ids(knowledge_base.data.get("file_ids", []))
deleted_knowledge_bases = []
for knowledge_base in knowledge_bases:
# -- Robust error handling for missing or invalid data
if not knowledge_base.data or not isinstance(knowledge_base.data, dict):
log.warning(
f"Knowledge base {knowledge_base.id} has no data or invalid data ({knowledge_base.data!r}). Deleting."
)
try:
Knowledges.delete_knowledge_by_id(id=knowledge_base.id)
deleted_knowledge_bases.append(knowledge_base.id)
except Exception as e:
log.error(
f"Failed to delete invalid knowledge base {knowledge_base.id}: {e}"
)
continue
try:
file_ids = knowledge_base.data.get("file_ids", [])
files = Files.get_files_by_ids(file_ids)
try:
if VECTOR_DB_CLIENT.has_collection(collection_name=knowledge_base.id):
VECTOR_DB_CLIENT.delete_collection(
@@ -190,10 +206,7 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
)
except Exception as e:
log.error(f"Error deleting collection {knowledge_base.id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error deleting vector DB collection",
)
continue # Skip, don't raise
failed_files = []
for file in files:
@@ -214,10 +227,8 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
except Exception as e:
log.error(f"Error processing knowledge base {knowledge_base.id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error processing knowledge base",
)
# Don't raise, just continue
continue
if failed_files:
log.warning(
@@ -226,7 +237,9 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
for failed in failed_files:
log.warning(f"File ID: {failed['file_id']}, Error: {failed['error']}")
log.info("Reindexing completed successfully")
log.info(
f"Reindexing completed. Deleted {len(deleted_knowledge_bases)} invalid knowledge bases: {deleted_knowledge_bases}"
)
return True
@@ -236,7 +249,7 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
class KnowledgeFilesResponse(KnowledgeResponse):
files: list[FileModel]
files: list[FileMetadataResponse]
@router.get("/{id}", response_model=Optional[KnowledgeFilesResponse])
@@ -252,7 +265,7 @@ async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)):
):
file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
files = Files.get_files_by_ids(file_ids)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
@@ -380,7 +393,7 @@ def add_file_to_knowledge_by_id(
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
if knowledge:
files = Files.get_files_by_ids(file_ids)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
@@ -457,7 +470,7 @@ def update_file_from_knowledge_by_id(
data = knowledge.data or {}
file_ids = data.get("file_ids", [])
files = Files.get_files_by_ids(file_ids)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
@@ -539,7 +552,7 @@ def remove_file_from_knowledge_by_id(
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
if knowledge:
files = Files.get_files_by_ids(file_ids)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
@@ -735,7 +748,7 @@ def add_files_to_knowledge_batch(
error_details = [f"{err.file_id}: {err.error}" for err in result.errors]
return KnowledgeFilesResponse(
**knowledge.model_dump(),
files=Files.get_files_by_ids(existing_file_ids),
files=Files.get_file_metadatas_by_ids(existing_file_ids),
warnings={
"message": "Some files failed to process",
"errors": error_details,
@@ -743,5 +756,6 @@ def add_files_to_knowledge_batch(
)
return KnowledgeFilesResponse(
**knowledge.model_dump(), files=Files.get_files_by_ids(existing_file_ids)
**knowledge.model_dump(),
files=Files.get_file_metadatas_by_ids(existing_file_ids),
)