From e936d7b53d4828eb3a8080c914f88f0f5cd7b0b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herm=C3=B3genes=20Oliveira?= Date: Sat, 8 Mar 2025 17:29:59 -0300 Subject: [PATCH] fix: audio api endpoint filetype check RFC2046 allows the Content-Type field to have additional parameters after the main type/subtype information (Section 1). Following RFC4281, many applications put codec information inside parameters in the Content-Type. This is especially common for formats that support many codecs, such as Ogg (RFC5334, Section 4). The `/api/audio/transcriptions` endpoint is currently rejecting files that contain parameters in the Content-Type field with a bad request error. This commit changes the current check in order to accept any Content-Type field that begins with a supported type/subtype as listed in the `supported_filetypes` tuple. Since Content-Type here is provided by the user, I believe this check is meant to prevent honest mistakes, like posting a PDF to an audio processing endpoint, not as a security measure against possibly malicious use. Therefore, I think it's OK not to validate the rest of the field. --- backend/open_webui/routers/audio.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index d6f74eac4..ea1372623 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -625,7 +625,9 @@ def transcription( ): log.info(f"file.content_type: {file.content_type}") - if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]: + supported_filetypes = ("audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a") + + if not file.content_type.startswith(supported_filetypes): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,