diff --git a/backend/open_webui/models/files.py b/backend/open_webui/models/files.py index 937a11768..4097ae08e 100644 --- a/backend/open_webui/models/files.py +++ b/backend/open_webui/models/files.py @@ -233,6 +233,68 @@ class FilesTable: for file in db.query(File).filter_by(user_id=user_id).all() ] + @staticmethod + def _glob_to_like_pattern(glob: str) -> str: + """ + Convert a glob/fnmatch pattern to a SQL LIKE pattern. + + Escapes SQL special characters and converts glob wildcards: + - `*` becomes `%` (match any sequence of characters) + - `?` becomes `_` (match exactly one character) + + Args: + glob: A glob pattern (e.g., "*.txt", "file?.doc") + + Returns: + A SQL LIKE compatible pattern with proper escaping. + """ + # Escape SQL special characters first, then convert glob wildcards + pattern = glob.replace("\\", "\\\\") + pattern = pattern.replace("%", "\\%") + pattern = pattern.replace("_", "\\_") + pattern = pattern.replace("*", "%") + pattern = pattern.replace("?", "_") + return pattern + + def search_files( + self, + user_id: Optional[str] = None, + filename: str = "*", + skip: int = 0, + limit: int = 100, + db: Optional[Session] = None, + ) -> list[FileModel]: + """ + Search files with glob pattern matching, optional user filter, and pagination. + + Args: + user_id: Filter by user ID. If None, returns files for all users. + filename: Glob pattern to match filenames (e.g., "*.txt"). Default "*" matches all. + skip: Number of results to skip for pagination. + limit: Maximum number of results to return. + db: Optional database session. + + Returns: + List of matching FileModel objects, ordered by updated_at descending. + """ + with get_db_context(db) as db: + query = db.query(File) + + if user_id: + query = query.filter_by(user_id=user_id) + + pattern = self._glob_to_like_pattern(filename) + if pattern != "%": + query = query.filter(File.filename.ilike(pattern, escape="\\")) + + return [ + FileModel.model_validate(file) + for file in query.order_by(File.updated_at.desc()) + .offset(skip) + .limit(limit) + .all() + ] + def update_file_by_id( self, id: str, form_data: FileUpdateForm, db: Optional[Session] = None ) -> Optional[FileModel]: diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 311f5f7b8..15d2a503c 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -2,7 +2,6 @@ import logging import os import uuid import json -from fnmatch import fnmatch from pathlib import Path from typing import Optional from urllib.parse import quote @@ -377,35 +376,39 @@ async def search_files( description="Filename pattern to search for. Supports wildcards such as '*.txt'", ), content: bool = Query(True), + skip: int = Query(0, ge=0, description="Number of files to skip"), + limit: int = Query(100, ge=1, le=1000, description="Maximum number of files to return"), user=Depends(get_verified_user), db: Session = Depends(get_session), ): """ Search for files by filename with support for wildcard patterns. + Uses SQL-based filtering with pagination for better performance. """ - # Get files according to user role - if user.role == "admin": - files = Files.get_files(db=db) - else: - files = Files.get_files_by_user_id(user.id, db=db) + # Determine user_id: null for admin (search all), user.id for regular users + user_id = None if user.role == "admin" else user.id - # Get matching files - matching_files = [ - file for file in files if fnmatch(file.filename.lower(), filename.lower()) - ] + # Use optimized database query with pagination + files = Files.search_files( + user_id=user_id, + filename=filename, + skip=skip, + limit=limit, + db=db, + ) - if not matching_files: + if not files: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No files found matching the pattern.", ) if not content: - for file in matching_files: - if "content" in file.data: + for file in files: + if file.data and "content" in file.data: del file.data["content"] - return matching_files + return files ############################