diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 7b45ccff5..c9ba33211 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -731,7 +731,7 @@ def process_file( collection_name = form_data.collection_name if collection_name is None: - collection_name = file.id + collection_name = f"file-{file.id}" loader = Loader( engine=app.state.config.CONTENT_EXTRACTION_ENGINE, @@ -758,12 +758,11 @@ def process_file( log.debug(f"text_content: {text_content}") hash = calculate_sha256_string(text_content) - res = Files.update_file_data_by_id( + Files.update_file_data_by_id( file.id, {"content": text_content}, ) - print(res) - Files.update_file_hash_by_id(form_data.file_id, hash) + Files.update_file_hash_by_id(file.id, hash) try: result = save_docs_to_vector_db( @@ -778,6 +777,13 @@ def process_file( ) if result: + Files.update_file_metadata_by_id( + file.id, + { + "collection_name": collection_name, + }, + ) + return { "status": True, "collection_name": collection_name, diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 12c30edbb..c671b03b4 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -319,7 +319,7 @@ def get_rag_context( for file in files: if file.get("context") == "full": context = { - "documents": [[file.get("file").get("content")]], + "documents": [[file.get("file").get("data", {}).get("content")]], "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], } else: diff --git a/backend/open_webui/apps/webui/routers/files.py b/backend/open_webui/apps/webui/routers/files.py index 1204dea9d..17c656be5 100644 --- a/backend/open_webui/apps/webui/routers/files.py +++ b/backend/open_webui/apps/webui/routers/files.py @@ -6,7 +6,8 @@ from pathlib import Path from typing import Optional from open_webui.apps.webui.models.files import FileForm, FileModel, Files -from open_webui.apps.webui.models.knowledge import Knowledges +from open_webui.apps.retrieval.main import process_file, ProcessFileForm + from open_webui.config import UPLOAD_DIR from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS @@ -61,6 +62,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)): ), ) + try: + process_file(ProcessFileForm(file_id=id)) + file = Files.get_file_by_id(id=id) + except Exception as e: + log.exception(e) + log.error(f"Error processing file: {file.id}") + if file: return file else: diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py index 29316258d..88ca8c398 100644 --- a/backend/open_webui/apps/webui/routers/knowledge.py +++ b/backend/open_webui/apps/webui/routers/knowledge.py @@ -17,7 +17,6 @@ from open_webui.utils.utils import get_admin_user, get_verified_user from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT - router = APIRouter() ############################ @@ -132,7 +131,7 @@ class KnowledgeFileIdForm(BaseModel): @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse]) -async def add_file_to_knowledge_by_id( +def add_file_to_knowledge_by_id( id: str, form_data: KnowledgeFileIdForm, user=Depends(get_admin_user), @@ -144,6 +143,11 @@ async def add_file_to_knowledge_by_id( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.NOT_FOUND, ) + if not file.data: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, + ) if knowledge: data = knowledge.data or {} @@ -191,7 +195,7 @@ class KnowledgeFileIdForm(BaseModel): @router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse]) -async def remove_file_from_knowledge_by_id( +def remove_file_from_knowledge_by_id( id: str, form_data: KnowledgeFileIdForm, user=Depends(get_admin_user), diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py index e8c456b9e..0326ae96e 100644 --- a/backend/open_webui/constants.py +++ b/backend/open_webui/constants.py @@ -95,6 +95,7 @@ class ERROR_MESSAGES(str, Enum): ) DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding." + FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding." class TASKS(str, Enum): diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index 511924a04..a0ba83a0e 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -138,6 +138,76 @@ export const updateKnowledgeById = async (token: string, id: string, form: Knowl return res; }; +export const addFileToKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/add`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const deleteKnowledgeById = async (token: string, id: string) => { let error = null; diff --git a/src/lib/components/chat/Controls/Controls.svelte b/src/lib/components/chat/Controls/Controls.svelte index f5807b9b8..01118e8a1 100644 --- a/src/lib/components/chat/Controls/Controls.svelte +++ b/src/lib/components/chat/Controls/Controls.svelte @@ -35,7 +35,7 @@ {#each chatFiles as file, fileIdx} <FileItem className="w-full" - {file} + item={file} edit={true} url={`${file?.url}`} name={file.name} diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 121a23205..3c3cf1593 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -125,16 +125,17 @@ } try { + // During the file upload, file content is automatically extracted. const uploadedFile = await uploadFile(localStorage.token, file); if (uploadedFile) { - fileItem.status = 'uploaded'; + fileItem.status = 'processed'; fileItem.file = uploadedFile; fileItem.id = uploadedFile.id; + fileItem.collection_name = uploadedFile?.meta?.collection_name; fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`; - // Try to extract content of the file for retrieval, even non-supported file types - processFileItem(fileItem); + files = files; } else { files = files.filter((item) => item.status !== null); } @@ -143,27 +144,6 @@ files = files.filter((item) => item.status !== null); } }; - - const processFileItem = async (fileItem) => { - try { - const res = await processFile(localStorage.token, fileItem.id); - if (res) { - fileItem.status = 'processed'; - fileItem.collection_name = res.collection_name; - fileItem.file = { - ...fileItem.file, - content: res.content - }; - - files = files; - } - } catch (e) { - // We keep the file in the files list even if it fails to process - fileItem.status = 'processed'; - files = files; - } - }; - const inputFilesHandler = async (inputFiles) => { inputFiles.forEach((file) => { console.log(file, file.name.split('.').at(-1)); @@ -456,7 +436,7 @@ </div> {:else} <FileItem - {file} + item={file} name={file.name} type={file.type} size={file?.size} diff --git a/src/lib/components/chat/Messages/UserMessage.svelte b/src/lib/components/chat/Messages/UserMessage.svelte index 8a087b2bc..2e4313464 100644 --- a/src/lib/components/chat/Messages/UserMessage.svelte +++ b/src/lib/components/chat/Messages/UserMessage.svelte @@ -127,7 +127,7 @@ <img src={file.url} alt="input" class=" max-h-96 rounded-lg" draggable="false" /> {:else} <FileItem - {file} + item={file} url={file.url} name={file.name} type={file.type} diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 5ddc8c96b..d0d3b83d2 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -15,7 +15,7 @@ export let dismissible = false; export let status = 'processed'; - export let file = null; + export let item = null; export let edit = false; export let name: string; @@ -25,15 +25,15 @@ let showModal = false; </script> -{#if file} - <FileItemModal bind:show={showModal} bind:file {edit} /> +{#if item} + <FileItemModal bind:show={showModal} bind:item {edit} /> {/if} <button class="relative group p-1.5 {className} flex items-center {colorClassName} rounded-2xl text-left" type="button" on:click={async () => { - if (file?.file?.content) { + if (item?.file?.data?.content) { showModal = !showModal; } else { if (url) { diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index f97e4f33d..6408ad05d 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -10,7 +10,7 @@ import Switch from './Switch.svelte'; import Tooltip from './Tooltip.svelte'; - export let file; + export let item; export let show = false; export let edit = false; @@ -18,9 +18,9 @@ let enableFullContent = false; onMount(() => { - console.log(file); + console.log(item); - if (file?.context === 'full') { + if (item?.context === 'full') { enableFullContent = true; } }); @@ -33,11 +33,11 @@ <div> <div class=" font-medium text-lg dark:text-gray-100"> <a - href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'} + href={item.url ? (item.type === 'file' ? `${item.url}/content` : `${item.url}`) : '#'} target="_blank" class="hover:underline line-clamp-1" > - {file?.name ?? 'File'} + {item?.name ?? 'File'} </a> </div> </div> @@ -56,14 +56,14 @@ <div> <div class="flex flex-col items-center md:flex-row gap-1 justify-between w-full"> <div class=" flex flex-wrap text-sm gap-1 text-gray-500"> - {#if file.size} - <div class="capitalize shrink-0">{formatFileSize(file.size)}</div> + {#if item.size} + <div class="capitalize shrink-0">{formatFileSize(item.size)}</div> • {/if} - {#if file?.file?.content} + {#if item?.file?.data?.content} <div class="capitalize shrink-0"> - {getLineCount(file?.file?.content ?? '')} extracted lines + {getLineCount(item?.file?.data?.content ?? '')} extracted lines </div> <div class="flex items-center gap-1 shrink-0"> @@ -90,7 +90,7 @@ <Switch bind:state={enableFullContent} on:change={(e) => { - file.context = e.detail ? 'full' : undefined; + item.context = e.detail ? 'full' : undefined; }} /> </div> @@ -102,7 +102,7 @@ </div> <div class="max-h-96 overflow-scroll scrollbar-hidden text-xs whitespace-pre-wrap"> - {file?.file?.content ?? 'No content'} + {item?.file?.data?.content ?? 'No content'} </div> </div> </Modal> diff --git a/src/lib/components/workspace/Knowledge/Collection.svelte b/src/lib/components/workspace/Knowledge/Collection.svelte index bbc03f051..46bfafc36 100644 --- a/src/lib/components/workspace/Knowledge/Collection.svelte +++ b/src/lib/components/workspace/Knowledge/Collection.svelte @@ -9,7 +9,12 @@ import { mobile, showSidebar } from '$lib/stores'; import { uploadFile } from '$lib/apis/files'; - import { getKnowledgeById, updateKnowledgeById } from '$lib/apis/knowledge'; + import { + addFileToKnowledgeById, + getKnowledgeById, + removeFileFromKnowledgeById, + updateKnowledgeById + } from '$lib/apis/knowledge'; import Spinner from '$lib/components/common/Spinner.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte'; @@ -77,7 +82,7 @@ if (uploadedFile) { console.log(uploadedFile); - processFileHandler(uploadedFile); + addFileHandler(uploadedFile.id); } else { toast.error($i18n.t('Failed to upload file.')); } @@ -86,34 +91,31 @@ } }; - const processFileHandler = async (uploadedFile) => { - const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch((e) => { - toast.error(e); + const addFileHandler = async (fileId) => { + const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch( + (e) => { + console.error(e); + } + ); + + if (updatedKnowledge) { + knowledge = updatedKnowledge; + toast.success($i18n.t('File added successfully.')); + } + }; + + const deleteFileHandler = async (fileId) => { + const updatedKnowledge = await removeFileFromKnowledgeById( + localStorage.token, + id, + fileId + ).catch((e) => { + console.error(e); }); - if (processedFile.status) { - console.log(processedFile); - - if (!knowledge.data) { - knowledge.data = {}; - } - - knowledge.data.file_ids = [...(knowledge?.data?.file_ids ?? []), uploadedFile.id]; - - console.log(knowledge); - - const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, { - data: knowledge?.data ?? {} - }).catch((e) => { - console.error(e); - }); - - if (updatedKnowledge) { - knowledge = updatedKnowledge; - toast.success($i18n.t('File added successfully.')); - } - } else { - toast.error($i18n.t('Failed to process file.')); + if (updatedKnowledge) { + knowledge = updatedKnowledge; + toast.success($i18n.t('File removed successfully.')); } }; @@ -338,6 +340,8 @@ }} on:delete={(e) => { console.log(e.detail); + + deleteFileHandler(e.detail); }} /> </div>