refac: audio transcription issue
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11.x) (push) Waiting to run
Python CI / Format Backend (3.12.x) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run

This commit is contained in:
Timothy Jaeryang Baek 2025-05-08 22:57:48 +04:00
parent bfa5550cc3
commit 827326e1a2
2 changed files with 25 additions and 16 deletions

View File

@ -71,13 +71,15 @@ from pydub import AudioSegment
from pydub.utils import mediainfo from pydub.utils import mediainfo
def get_audio_format(file_path): def get_audio_convert_format(file_path):
"""Check if the given file needs to be converted to a different format.""" """Check if the given file needs to be converted to a different format."""
if not os.path.isfile(file_path): if not os.path.isfile(file_path):
log.error(f"File not found: {file_path}") log.error(f"File not found: {file_path}")
return False return False
try:
info = mediainfo(file_path) info = mediainfo(file_path)
if ( if (
info.get("codec_name") == "aac" info.get("codec_name") == "aac"
and info.get("codec_type") == "audio" and info.get("codec_type") == "audio"
@ -86,6 +88,9 @@ def get_audio_format(file_path):
return "mp4" return "mp4"
elif info.get("format_name") == "ogg": elif info.get("format_name") == "ogg":
return "ogg" return "ogg"
except Exception as e:
log.error(f"Error getting audio format: {e}")
return False
return None return None
@ -537,14 +542,18 @@ def transcribe(request: Request, file_path):
log.debug(data) log.debug(data)
return data return data
elif request.app.state.config.STT_ENGINE == "openai": elif request.app.state.config.STT_ENGINE == "openai":
audio_format = get_audio_format(file_path) convert_format = get_audio_convert_format(file_path)
if audio_format:
os.rename(file_path, file_path.replace(".wav", f".{audio_format}")) print(f"convert_format: {convert_format}")
if convert_format:
ext = convert_format.split(".")[-1]
os.rename(file_path, file_path.replace(".{ext}", f".{convert_format}"))
# Convert unsupported audio file to WAV format # Convert unsupported audio file to WAV format
convert_audio_to_wav( convert_audio_to_wav(
file_path.replace(".wav", f".{audio_format}"), file_path.replace(".{ext}", f".{convert_format}"),
file_path, file_path,
audio_format, convert_format,
) )
r = None r = None

View File

@ -133,6 +133,7 @@ def upload_file(
"audio/ogg", "audio/ogg",
"audio/x-m4a", "audio/x-m4a",
"audio/webm", "audio/webm",
"video/webm",
) )
): ):
file_path = Storage.get_file(file_path) file_path = Storage.get_file(file_path)
@ -150,7 +151,6 @@ def upload_file(
"video/mp4", "video/mp4",
"video/ogg", "video/ogg",
"video/quicktime", "video/quicktime",
"video/webm",
]: ]:
process_file(request, ProcessFileForm(file_id=id), user=user) process_file(request, ProcessFileForm(file_id=id), user=user)