Merge pull request #12603 from alpha-pet/fix-convert-ogg-container-openai-transcription

fix: Convert ogg to wav for OpenAI transcription endpoint
This commit is contained in:
Tim Jaeryang Baek 2025-04-10 12:25:46 -07:00 committed by GitHub
commit f0efee5de4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -68,8 +68,8 @@ from pydub import AudioSegment
from pydub.utils import mediainfo
def is_mp4_audio(file_path):
"""Check if the given file is an MP4 audio file."""
def audio_needs_conversion(file_path):
"""Check if the given file needs to be converted to a different format."""
if not os.path.isfile(file_path):
log.error(f"File not found: {file_path}")
return False
@ -80,13 +80,17 @@ def is_mp4_audio(file_path):
and info.get("codec_type") == "audio"
and info.get("codec_tag_string") == "mp4a"
):
return True
return False
return "mp4"
elif info.get("format_name") == "ogg":
return "ogg"
elif info.get("format_name") == "matroska,webm":
return "webm"
return None
def convert_mp4_to_wav(file_path, output_path):
"""Convert MP4 audio file to WAV format."""
audio = AudioSegment.from_file(file_path, format="mp4")
def convert_audio_to_wav(file_path, output_path, conversion_type):
"""Convert MP4/OGG audio file to WAV format."""
audio = AudioSegment.from_file(file_path, format=conversion_type)
audio.export(output_path, format="wav")
log.info(f"Converted {file_path} to {output_path}")
@ -496,10 +500,15 @@ def transcribe(request: Request, file_path):
log.debug(data)
return data
elif request.app.state.config.STT_ENGINE == "openai":
if is_mp4_audio(file_path):
os.rename(file_path, file_path.replace(".wav", ".mp4"))
# Convert MP4 audio file to WAV format
convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path)
conversion_type = audio_needs_conversion(file_path)
if conversion_type:
os.rename(file_path, file_path.replace(".wav", f".{conversion_type}"))
# Convert unsupported audio file to WAV format
convert_audio_to_wav(
file_path.replace(".wav", f".{conversion_type}"),
file_path,
conversion_type,
)
r = None
try: