refac: audio transcription issue

2025-06-26 18:26:48 +00:00 · 2025-05-08 22:57:48 +04:00 · 2025-05-08 22:57:48 +04:00 · 827326e1a2
commit 827326e1a2
parent bfa5550cc3
2 changed files with 25 additions and 16 deletions
--- a/backend/open_webui/routers/audio.py
+++ b/backend/open_webui/routers/audio.py
@ -71,13 +71,15 @@ from pydub import AudioSegment
 from pydub.utils import mediainfo
-def get_audio_format(file_path):
+def get_audio_convert_format(file_path):
    """Check if the given file needs to be converted to a different format."""
    if not os.path.isfile(file_path):
        log.error(f"File not found: {file_path}")
        return False
    try:
        info = mediainfo(file_path)
        if (
            info.get("codec_name") == "aac"
            and info.get("codec_type") == "audio"
@ -86,6 +88,9 @@ def get_audio_format(file_path):
            return "mp4"
        elif info.get("format_name") == "ogg":
            return "ogg"
    except Exception as e:
        log.error(f"Error getting audio format: {e}")
        return False
    return None
@ -537,14 +542,18 @@ def transcribe(request: Request, file_path):
        log.debug(data)
        return data
    elif request.app.state.config.STT_ENGINE == "openai":
-        audio_format = get_audio_format(file_path)
+        convert_format = get_audio_convert_format(file_path)
-        if audio_format:
+
-            os.rename(file_path, file_path.replace(".wav", f".{audio_format}"))
+        print(f"convert_format: {convert_format}")
        if convert_format:
            ext = convert_format.split(".")[-1]
            os.rename(file_path, file_path.replace(".{ext}", f".{convert_format}"))
            # Convert unsupported audio file to WAV format
            convert_audio_to_wav(
-                file_path.replace(".wav", f".{audio_format}"),
+                file_path.replace(".{ext}", f".{convert_format}"),
                file_path,
-                audio_format,
+                convert_format,
            )
        r = None
--- a/backend/open_webui/routers/files.py
+++ b/backend/open_webui/routers/files.py
@ -133,6 +133,7 @@ def upload_file(
                        "audio/ogg",
                        "audio/x-m4a",
                        "audio/webm",
                        "video/webm",
                    )
                ):
                    file_path = Storage.get_file(file_path)
@ -150,7 +151,6 @@ def upload_file(
                    "video/mp4",
                    "video/ogg",
                    "video/quicktime",
                    "video/webm",
                ]:
                    process_file(request, ProcessFileForm(file_id=id), user=user)