From 353b104c77e83b644b48382cefa48bd602d0c212 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 00:42:37 -0400 Subject: [PATCH 1/6] add support for searching files --- backend/open_webui/routers/files.py | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index c30366545..1c1c1657e 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -1,6 +1,8 @@ import logging import os import uuid +from fnmatch import fnmatch +from functools import lru_cache from pathlib import Path from typing import Optional from urllib.parse import quote @@ -72,6 +74,19 @@ def has_access_to_file( return has_access +############################ +# Get all files for user, with 1 cache +############################ + + +@lru_cache(maxsize=1) +def get_all_files_for_user(user_id: str, admin: bool): + if admin: + return Files.get_files() + else: + return Files.get_files_by_user_id(user_id) + + ############################ # Upload File ############################ @@ -177,6 +192,34 @@ async def list_files(user=Depends(get_verified_user), content: bool = Query(True return files +############################ +# Search Files +############################ + + +@router.get("/search", response_model=list[FileModelResponse]) +async def search_files( + filename: str = Query(..., description="Filename pattern to search for. Supports wildcards such as '*.pdf'"), + user=Depends(get_verified_user) +): + # Retrieve files from cache + files = get_all_files_for_user(user.id, user.role == "admin") + + # Normalize pattern and file names + normalized_pattern = normalize_text(filename).lower() + matching_files = [ + file for file in files + if fnmatch(normalize_text(file.filename).lower(), normalized_pattern) + ] + + if not matching_files: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No files found matching the pattern." + ) + return matching_files + + ############################ # Delete All Files ############################ From 7c1b0046874ae7182c443ab91246c1e384eafb21 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 00:44:47 -0400 Subject: [PATCH 2/6] simplify logic --- backend/open_webui/routers/files.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 1c1c1657e..c16870e9b 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -205,12 +205,8 @@ async def search_files( # Retrieve files from cache files = get_all_files_for_user(user.id, user.role == "admin") - # Normalize pattern and file names - normalized_pattern = normalize_text(filename).lower() - matching_files = [ - file for file in files - if fnmatch(normalize_text(file.filename).lower(), normalized_pattern) - ] + # Get matching files + matching_files = [file for file in files if fnmatch(file.filename.lower(), filename.lower())] if not matching_files: raise HTTPException( From e06ff17a709cfa6e320abc2f43850c0fab4e9908 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 00:48:54 -0400 Subject: [PATCH 3/6] Fix formatting --- backend/open_webui/routers/files.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index c16870e9b..eed622029 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -199,19 +199,24 @@ async def list_files(user=Depends(get_verified_user), content: bool = Query(True @router.get("/search", response_model=list[FileModelResponse]) async def search_files( - filename: str = Query(..., description="Filename pattern to search for. Supports wildcards such as '*.pdf'"), + filename: str = Query( + ..., + description="Filename pattern to search for. Supports wildcards such as '*.txt'" + ), user=Depends(get_verified_user) ): # Retrieve files from cache files = get_all_files_for_user(user.id, user.role == "admin") # Get matching files - matching_files = [file for file in files if fnmatch(file.filename.lower(), filename.lower())] + matching_files = [ + file for file in files if fnmatch(file.filename.lower(), filename.lower()) + ] if not matching_files: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, - detail="No files found matching the pattern." + detail="No files found matching the pattern.", ) return matching_files From 8609ca3657743c60a45fbefce02c8c0f48bd368b Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 00:49:55 -0400 Subject: [PATCH 4/6] Fix formatting again --- backend/open_webui/routers/files.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index eed622029..e1113473d 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -201,9 +201,9 @@ async def list_files(user=Depends(get_verified_user), content: bool = Query(True async def search_files( filename: str = Query( ..., - description="Filename pattern to search for. Supports wildcards such as '*.txt'" + description="Filename pattern to search for. Supports wildcards such as '*.txt'", ), - user=Depends(get_verified_user) + user=Depends(get_verified_user), ): # Retrieve files from cache files = get_all_files_for_user(user.id, user.role == "admin") @@ -212,7 +212,7 @@ async def search_files( matching_files = [ file for file in files if fnmatch(file.filename.lower(), filename.lower()) ] - + if not matching_files: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, From 1c60b8d543d37fc56c71b2a7debaf3d9dda76570 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 00:56:21 -0400 Subject: [PATCH 5/6] Rewrite logic --- backend/open_webui/routers/files.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index e1113473d..61ee89f43 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -2,7 +2,6 @@ import logging import os import uuid from fnmatch import fnmatch -from functools import lru_cache from pathlib import Path from typing import Optional from urllib.parse import quote @@ -74,19 +73,6 @@ def has_access_to_file( return has_access -############################ -# Get all files for user, with 1 cache -############################ - - -@lru_cache(maxsize=1) -def get_all_files_for_user(user_id: str, admin: bool): - if admin: - return Files.get_files() - else: - return Files.get_files_by_user_id(user_id) - - ############################ # Upload File ############################ @@ -205,8 +191,14 @@ async def search_files( ), user=Depends(get_verified_user), ): - # Retrieve files from cache - files = get_all_files_for_user(user.id, user.role == "admin") + """ + Search for files by filename with support for wildcard patterns. + """ + # Get files according to user role + if user.role == "admin": + files = Files.get_files() + else: + files = Files.get_files_by_user_id(user.id) # Get matching files matching_files = [ From fed47f2e2b2917f7fc033cec2e386dc03c8a2c99 Mon Sep 17 00:00:00 2001 From: Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Date: Tue, 8 Apr 2025 01:00:06 -0400 Subject: [PATCH 6/6] Add content param to /search route --- backend/open_webui/routers/files.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 61ee89f43..8a2888d86 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -189,6 +189,7 @@ async def search_files( ..., description="Filename pattern to search for. Supports wildcards such as '*.txt'", ), + content: bool = Query(True), user=Depends(get_verified_user), ): """ @@ -210,6 +211,11 @@ async def search_files( status_code=status.HTTP_404_NOT_FOUND, detail="No files found matching the pattern.", ) + + if not content: + for file in matching_files: + del file.data["content"] + return matching_files