From 4e545d432ba8d5606b519db52b602cd73a8aa2a7 Mon Sep 17 00:00:00 2001 From: hurxxxx Date: Tue, 8 Apr 2025 00:44:10 +0900 Subject: [PATCH] feat: add new admin func - reindex knowledge files --- backend/open_webui/routers/knowledge.py | 69 +++++++++++++++++++ src/lib/apis/knowledge/index.ts | 29 ++++++++ .../admin/Settings/Documents.svelte | 38 ++++++++-- 3 files changed, 131 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index bc1e2429e..ab745cf84 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -159,6 +159,72 @@ async def create_new_knowledge( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.FILE_EXISTS, ) + + + +############################ +# ReindexKnowledgeFiles +############################ + + +@router.post("/reindex", response_model=bool) +async def reindex_knowledge_files( + request: Request, + user=Depends(get_verified_user) +): + if user.role != "admin": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.UNAUTHORIZED, + ) + + knowledge_bases = Knowledges.get_knowledge_bases() + + log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases") + + for knowledge_base in knowledge_bases: + try: + files = Files.get_files_by_ids(knowledge_base.data.get("file_ids", [])) + + try: + if VECTOR_DB_CLIENT.has_collection(collection_name=knowledge_base.id): + VECTOR_DB_CLIENT.delete_collection( + collection_name=knowledge_base.id + ) + except Exception as e: + log.error(f"Error deleting collection {knowledge_base.id}: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error deleting vector DB collection" + ) + + failed_files = [] + for file in files: + try: + process_file( + request, + ProcessFileForm(file_id=file.id, collection_name=knowledge_base.id), + user=user, + ) + except Exception as e: + log.error(f"Error processing file {file.filename} (ID: {file.id}): {str(e)}") + failed_files.append({"file_id": file.id, "error": str(e)}) + continue + + except Exception as e: + log.error(f"Error processing knowledge base {knowledge_base.id}: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing knowledge base" + ) + + if failed_files: + log.warning(f"Failed to process {len(failed_files)} files in knowledge base {knowledge_base.id}") + for failed in failed_files: + log.warning(f"File ID: {failed['file_id']}, Error: {failed['error']}") + + log.info("Reindexing completed successfully") + return True ############################ @@ -676,3 +742,6 @@ def add_files_to_knowledge_batch( return KnowledgeFilesResponse( **knowledge.model_dump(), files=Files.get_files_by_ids(existing_file_ids) ) + + + diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index c5fad1323..a1b80dbe4 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -345,3 +345,32 @@ export const deleteKnowledgeById = async (token: string, id: string) => { return res; }; + + +export const reindexKnowledgeFiles = async (token: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/reindex`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; \ No newline at end of file diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index b105ebdb9..294d8d0d7 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -13,14 +13,12 @@ updateEmbeddingConfig, getRerankingConfig, updateRerankingConfig, - resetUploadDir, getRAGConfig, updateRAGConfig } from '$lib/apis/retrieval'; - import { knowledge, models } from '$lib/stores'; - import { getKnowledgeBases } from '$lib/apis/knowledge'; - import { uploadDir, deleteAllFiles, deleteFileById } from '$lib/apis/files'; + import { reindexKnowledgeFiles} from '$lib/apis/knowledge'; + import { deleteAllFiles } from '$lib/apis/files'; import ResetUploadDirConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; import ResetVectorDBConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; @@ -31,12 +29,12 @@ const i18n = getContext('i18n'); - let scanDirLoading = false; let updateEmbeddingModelLoading = false; let updateRerankingModelLoading = false; let showResetConfirm = false; let showResetUploadDirConfirm = false; + let showReindexConfirm = false; let embeddingEngine = ''; let embeddingModel = ''; @@ -333,6 +331,21 @@ }} /> + + { + const res = await reindexKnowledgeFiles(localStorage.token).catch((error) => { + toast.error(`${error}`); + return null; + }); + + if (res) { + toast.success($i18n.t('Success')); + } + }} +/> +
{ @@ -950,6 +963,21 @@ +
+
+ {$i18n.t('Reindex Knowledge Base Vectors')} +
+
+ +
+