open-webui/backend/open_webui/routers/files.py

372 lines
11 KiB
Python
Raw Normal View History

2024-08-27 22:10:27 +00:00
import logging
2024-06-18 18:36:55 +00:00
import os
import uuid
2024-08-27 22:10:27 +00:00
from pathlib import Path
from typing import Optional
2024-10-04 07:23:14 +00:00
from pydantic import BaseModel
2024-10-05 00:22:00 +00:00
import mimetypes
2024-12-16 07:08:51 +00:00
from urllib.parse import quote
2024-10-05 00:22:00 +00:00
2024-10-21 06:38:26 +00:00
from open_webui.storage.provider import Storage
2024-06-18 18:36:55 +00:00
2024-12-10 08:54:13 +00:00
from open_webui.models.files import (
2024-10-21 06:38:26 +00:00
FileForm,
FileModel,
FileModelResponse,
Files,
)
2024-12-12 02:36:59 +00:00
from open_webui.routers.retrieval import process_file, ProcessFileForm
2024-10-04 05:22:22 +00:00
2024-10-05 08:45:22 +00:00
from open_webui.config import UPLOAD_DIR
from open_webui.env import SRC_LOG_LEVELS
2024-10-05 00:22:00 +00:00
from open_webui.constants import ERROR_MESSAGES
2024-10-03 03:42:10 +00:00
2024-12-13 06:32:28 +00:00
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status, Request
2024-10-02 13:19:09 +00:00
from fastapi.responses import FileResponse, StreamingResponse
2024-10-05 00:22:00 +00:00
2024-12-09 00:01:56 +00:00
from open_webui.utils.auth import get_admin_user, get_verified_user
2024-06-18 18:36:55 +00:00
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MODELS"])
router = APIRouter()
############################
# Upload File
############################
2024-10-26 19:56:37 +00:00
@router.post("/", response_model=FileModelResponse)
2024-12-13 06:32:28 +00:00
def upload_file(
request: Request, file: UploadFile = File(...), user=Depends(get_verified_user)
):
2024-06-18 18:36:55 +00:00
log.info(f"file.content_type: {file.content_type}")
try:
unsanitized_filename = file.filename
filename = os.path.basename(unsanitized_filename)
# replace filename with uuid
id = str(uuid.uuid4())
2024-07-15 11:05:38 +00:00
name = filename
2024-06-18 21:15:08 +00:00
filename = f"{id}_{filename}"
2024-10-21 06:38:26 +00:00
contents, file_path = Storage.upload_file(file.file, filename)
2024-06-18 18:36:55 +00:00
2024-10-21 06:45:15 +00:00
file_item = Files.insert_new_file(
2024-06-18 20:50:18 +00:00
user.id,
FileForm(
**{
"id": id,
2024-11-22 03:46:09 +00:00
"filename": name,
2024-10-21 00:45:37 +00:00
"path": file_path,
2024-06-18 20:50:18 +00:00
"meta": {
2024-07-15 11:05:38 +00:00
"name": name,
2024-06-18 20:50:18 +00:00
"content_type": file.content_type,
"size": len(contents),
},
}
),
2024-06-18 18:36:55 +00:00
)
2024-10-04 05:22:22 +00:00
try:
2024-12-13 06:32:28 +00:00
process_file(request, ProcessFileForm(file_id=id))
2024-10-23 22:09:20 +00:00
file_item = Files.get_file_by_id(id=id)
2024-10-04 05:22:22 +00:00
except Exception as e:
log.exception(e)
2024-10-21 06:45:15 +00:00
log.error(f"Error processing file: {file_item.id}")
2024-10-26 19:56:37 +00:00
file_item = FileModelResponse(
**{
**file_item.model_dump(),
2024-10-26 20:05:54 +00:00
"error": str(e.detail) if hasattr(e, "detail") else str(e),
2024-10-26 19:56:37 +00:00
}
)
2024-10-04 05:22:22 +00:00
2024-10-21 06:45:15 +00:00
if file_item:
return file_item
2024-06-18 18:36:55 +00:00
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error uploading file"),
)
except Exception as e:
log.exception(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
############################
# List Files
############################
2024-10-21 06:38:26 +00:00
@router.get("/", response_model=list[FileModelResponse])
2024-08-22 14:08:03 +00:00
async def list_files(user=Depends(get_verified_user)):
2024-08-23 14:19:04 +00:00
if user.role == "admin":
files = Files.get_files()
else:
files = Files.get_files_by_user_id(user.id)
2024-06-18 18:36:55 +00:00
return files
2024-06-18 22:20:04 +00:00
############################
# Delete All Files
############################
@router.delete("/all")
async def delete_all_files(user=Depends(get_admin_user)):
result = Files.delete_all_files()
2024-06-18 22:20:04 +00:00
if result:
try:
2024-10-21 06:38:26 +00:00
Storage.delete_all_files()
2024-06-18 22:20:04 +00:00
except Exception as e:
2024-10-21 06:38:26 +00:00
log.exception(e)
log.error(f"Error deleting files")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
)
2024-06-18 22:20:04 +00:00
return {"message": "All files deleted successfully"}
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
)
2024-06-18 18:36:55 +00:00
############################
# Get File By Id
############################
@router.get("/{id}", response_model=Optional[FileModel])
2024-08-22 14:08:03 +00:00
async def get_file_by_id(id: str, user=Depends(get_verified_user)):
file = Files.get_file_by_id(id)
2024-06-18 18:36:55 +00:00
2024-08-23 14:19:04 +00:00
if file and (file.user_id == user.id or user.role == "admin"):
2024-06-18 18:36:55 +00:00
return file
else:
raise HTTPException(
2024-06-20 20:49:04 +00:00
status_code=status.HTTP_404_NOT_FOUND,
2024-06-18 18:36:55 +00:00
detail=ERROR_MESSAGES.NOT_FOUND,
)
2024-10-04 07:23:14 +00:00
############################
# Get File Data Content By Id
############################
@router.get("/{id}/data/content")
async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)):
file = Files.get_file_by_id(id)
if file and (file.user_id == user.id or user.role == "admin"):
return {"content": file.data.get("content", "")}
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
############################
# Update File Data Content By Id
############################
class ContentForm(BaseModel):
content: str
@router.post("/{id}/data/content/update")
async def update_file_data_content_by_id(
2024-12-13 06:32:28 +00:00
request: Request, id: str, form_data: ContentForm, user=Depends(get_verified_user)
2024-10-04 07:23:14 +00:00
):
file = Files.get_file_by_id(id)
if file and (file.user_id == user.id or user.role == "admin"):
try:
2024-12-13 06:32:28 +00:00
process_file(
request, ProcessFileForm(file_id=id, content=form_data.content)
)
2024-10-04 07:23:14 +00:00
file = Files.get_file_by_id(id=id)
except Exception as e:
log.exception(e)
log.error(f"Error processing file: {file.id}")
return {"content": file.data.get("content", "")}
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
2024-06-18 21:33:44 +00:00
############################
# Get File Content By Id
############################
@router.get("/{id}/content")
2024-08-22 14:08:03 +00:00
async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
file = Files.get_file_by_id(id)
2024-08-23 14:19:04 +00:00
if file and (file.user_id == user.id or user.role == "admin"):
2024-10-21 06:38:26 +00:00
try:
file_path = Storage.get_file(file.path)
file_path = Path(file_path)
# Check if the file already exists in the cache
if file_path.is_file():
2024-12-16 07:08:51 +00:00
# Handle Unicode filenames
filename = file.meta.get("name", file.filename)
encoded_filename = quote(filename) # RFC5987 encoding
2024-12-19 02:15:58 +00:00
headers = {}
if file.meta.get("content_type") not in [
"application/pdf",
"text/plain",
]:
headers = {
**headers,
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}",
}
2024-12-16 07:08:51 +00:00
2024-10-21 06:38:26 +00:00
return FileResponse(file_path, headers=headers)
2024-12-16 07:08:51 +00:00
2024-10-21 06:38:26 +00:00
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
except Exception as e:
log.exception(e)
log.error(f"Error getting file content")
2024-10-24 22:02:26 +00:00
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
)
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
@router.get("/{id}/content/html")
async def get_html_file_content_by_id(id: str, user=Depends(get_verified_user)):
file = Files.get_file_by_id(id)
if file and (file.user_id == user.id or user.role == "admin"):
try:
file_path = Storage.get_file(file.path)
file_path = Path(file_path)
# Check if the file already exists in the cache
if file_path.is_file():
print(f"file_path: {file_path}")
return FileResponse(file_path)
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
except Exception as e:
log.exception(e)
log.error(f"Error getting file content")
2024-06-22 21:49:00 +00:00
raise HTTPException(
2024-10-21 06:38:26 +00:00
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error getting file content"),
2024-06-22 21:49:00 +00:00
)
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
@router.get("/{id}/content/{file_name}")
2024-08-22 14:08:03 +00:00
async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
2024-06-22 21:49:00 +00:00
file = Files.get_file_by_id(id)
2024-08-23 14:19:04 +00:00
if file and (file.user_id == user.id or user.role == "admin"):
2024-10-21 00:45:37 +00:00
file_path = file.path
2024-12-16 07:08:51 +00:00
# Handle Unicode filenames
filename = file.meta.get("name", file.filename)
encoded_filename = quote(filename) # RFC5987 encoding
headers = {
"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"
}
2024-10-02 13:19:09 +00:00
if file_path:
2024-10-21 06:38:26 +00:00
file_path = Storage.get_file(file_path)
2024-10-02 13:19:09 +00:00
file_path = Path(file_path)
# Check if the file already exists in the cache
if file_path.is_file():
return FileResponse(file_path, headers=headers)
2024-10-02 13:19:09 +00:00
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=ERROR_MESSAGES.NOT_FOUND,
)
2024-06-18 21:33:44 +00:00
else:
2024-10-02 13:19:09 +00:00
# File path doesnt exist, return the content as .txt if possible
file_content = file.content.get("content", "")
file_name = file.filename
# Create a generator that encodes the file content
def generator():
yield file_content.encode("utf-8")
return StreamingResponse(
generator(),
media_type="text/plain",
2024-12-16 07:08:51 +00:00
headers=headers,
2024-06-18 21:33:44 +00:00
)
else:
raise HTTPException(
2024-06-20 20:49:04 +00:00
status_code=status.HTTP_404_NOT_FOUND,
2024-06-18 21:33:44 +00:00
detail=ERROR_MESSAGES.NOT_FOUND,
)
2024-06-18 18:36:55 +00:00
############################
# Delete File By Id
############################
@router.delete("/{id}")
2024-08-22 14:08:03 +00:00
async def delete_file_by_id(id: str, user=Depends(get_verified_user)):
file = Files.get_file_by_id(id)
2024-08-23 14:19:04 +00:00
if file and (file.user_id == user.id or user.role == "admin"):
2025-01-13 17:21:00 +00:00
# We should add Chroma cleanup here
result = Files.delete_file_by_id(id)
2024-06-18 18:36:55 +00:00
if result:
2024-10-21 06:38:26 +00:00
try:
2024-12-29 01:40:00 +00:00
Storage.delete_file(file.path)
2024-10-21 06:38:26 +00:00
except Exception as e:
log.exception(e)
log.error(f"Error deleting files")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error deleting files"),
)
2024-06-18 18:36:55 +00:00
return {"message": "File deleted successfully"}
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT("Error deleting file"),
)
else:
raise HTTPException(
2024-06-20 20:49:04 +00:00
status_code=status.HTTP_404_NOT_FOUND,
2024-06-18 18:36:55 +00:00
detail=ERROR_MESSAGES.NOT_FOUND,
)