diff --git a/backend/open_webui/models/files.py b/backend/open_webui/models/files.py index 4097ae08e..c24b242bd 100644 --- a/backend/open_webui/models/files.py +++ b/backend/open_webui/models/files.py @@ -4,7 +4,7 @@ from typing import Optional from sqlalchemy.orm import Session from open_webui.internal.db import Base, JSONField, get_db, get_db_context -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, model_validator from sqlalchemy import BigInteger, Column, String, Text, JSON log = logging.getLogger(__name__) @@ -63,6 +63,25 @@ class FileMeta(BaseModel): model_config = ConfigDict(extra="allow") + @model_validator(mode="before") + @classmethod + def sanitize_meta(cls, data): + """Sanitize metadata fields to handle malformed legacy data.""" + if not isinstance(data, dict): + return data + + # Handle content_type that may be a list like ['application/pdf', None] + content_type = data.get("content_type") + if isinstance(content_type, list): + # Extract first non-None string value + data["content_type"] = next( + (item for item in content_type if isinstance(item, str)), None + ) + elif content_type is not None and not isinstance(content_type, str): + data["content_type"] = None + + return data + class FileModelResponse(BaseModel): id: str @@ -74,7 +93,7 @@ class FileModelResponse(BaseModel): meta: FileMeta created_at: int # timestamp in epoch - updated_at: int # timestamp in epoch + updated_at: Optional[int] = None # timestamp in epoch, optional for legacy files model_config = ConfigDict(extra="allow") diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 130d0486b..505564771 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -282,7 +282,7 @@ def upload_file_handler( }, "meta": { "name": name, - "content_type": file.content_type, + "content_type": file.content_type if isinstance(file.content_type, str) else None, "size": len(contents), "data": file_metadata, }, @@ -827,6 +827,23 @@ async def delete_file_by_id( or has_access_to_file(id, "write", user, db=db) ): + # Clean up KB associations and embeddings before deleting + knowledges = Knowledges.get_knowledges_by_file_id(id, db=db) + for knowledge in knowledges: + # Remove KB-file relationship + Knowledges.remove_file_from_knowledge_by_id(knowledge.id, id, db=db) + # Clean KB embeddings (same logic as /knowledge/{id}/file/remove) + try: + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": id} + ) + if file.hash: + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"hash": file.hash} + ) + except Exception as e: + log.debug(f"KB embedding cleanup for {knowledge.id}: {e}") + result = Files.delete_file_by_id(id, db=db) if result: try: diff --git a/src/lib/apis/files/index.ts b/src/lib/apis/files/index.ts index 44af669fa..15785b354 100644 --- a/src/lib/apis/files/index.ts +++ b/src/lib/apis/files/index.ts @@ -175,6 +175,44 @@ export const getFiles = async (token: string = '') => { return res; }; +export const searchFiles = async ( + token: string, + filename: string = '*', + skip: number = 0, + limit: number = 50 +) => { + let error = null; + + const searchParams = new URLSearchParams(); + searchParams.append('filename', filename); + searchParams.append('skip', String(skip)); + searchParams.append('limit', String(limit)); + + const res = await fetch(`${WEBUI_API_BASE_URL}/files/search?${searchParams.toString()}`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.error(err); + return []; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getFileById = async (token: string, id: string) => { let error = null; diff --git a/src/lib/components/chat/Settings/DataControls.svelte b/src/lib/components/chat/Settings/DataControls.svelte index 5eed8918e..e20aa72df 100644 --- a/src/lib/components/chat/Settings/DataControls.svelte +++ b/src/lib/components/chat/Settings/DataControls.svelte @@ -25,6 +25,7 @@ import { toast } from 'svelte-sonner'; import ArchivedChatsModal from '$lib/components/layout/ArchivedChatsModal.svelte'; import SharedChatsModal from '$lib/components/layout/SharedChatsModal.svelte'; + import FilesModal from '$lib/components/layout/FilesModal.svelte'; import ConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; const i18n = getContext('i18n'); @@ -38,6 +39,7 @@ let showDeleteConfirmDialog = false; let showArchivedChatsModal = false; let showSharedChatsModal = false; + let showFilesModal = false; let chatImportInputElement: HTMLInputElement; @@ -139,6 +141,7 @@ +