mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
refac: audio transcription issue
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11.x) (push) Waiting to run
Python CI / Format Backend (3.12.x) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11.x) (push) Waiting to run
Python CI / Format Backend (3.12.x) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
This commit is contained in:
parent
bfa5550cc3
commit
827326e1a2
@ -71,21 +71,26 @@ from pydub import AudioSegment
|
||||
from pydub.utils import mediainfo
|
||||
|
||||
|
||||
def get_audio_format(file_path):
|
||||
def get_audio_convert_format(file_path):
|
||||
"""Check if the given file needs to be converted to a different format."""
|
||||
if not os.path.isfile(file_path):
|
||||
log.error(f"File not found: {file_path}")
|
||||
return False
|
||||
|
||||
info = mediainfo(file_path)
|
||||
if (
|
||||
info.get("codec_name") == "aac"
|
||||
and info.get("codec_type") == "audio"
|
||||
and info.get("codec_tag_string") == "mp4a"
|
||||
):
|
||||
return "mp4"
|
||||
elif info.get("format_name") == "ogg":
|
||||
return "ogg"
|
||||
try:
|
||||
info = mediainfo(file_path)
|
||||
|
||||
if (
|
||||
info.get("codec_name") == "aac"
|
||||
and info.get("codec_type") == "audio"
|
||||
and info.get("codec_tag_string") == "mp4a"
|
||||
):
|
||||
return "mp4"
|
||||
elif info.get("format_name") == "ogg":
|
||||
return "ogg"
|
||||
except Exception as e:
|
||||
log.error(f"Error getting audio format: {e}")
|
||||
return False
|
||||
|
||||
return None
|
||||
|
||||
@ -537,14 +542,18 @@ def transcribe(request: Request, file_path):
|
||||
log.debug(data)
|
||||
return data
|
||||
elif request.app.state.config.STT_ENGINE == "openai":
|
||||
audio_format = get_audio_format(file_path)
|
||||
if audio_format:
|
||||
os.rename(file_path, file_path.replace(".wav", f".{audio_format}"))
|
||||
convert_format = get_audio_convert_format(file_path)
|
||||
|
||||
print(f"convert_format: {convert_format}")
|
||||
if convert_format:
|
||||
ext = convert_format.split(".")[-1]
|
||||
|
||||
os.rename(file_path, file_path.replace(".{ext}", f".{convert_format}"))
|
||||
# Convert unsupported audio file to WAV format
|
||||
convert_audio_to_wav(
|
||||
file_path.replace(".wav", f".{audio_format}"),
|
||||
file_path.replace(".{ext}", f".{convert_format}"),
|
||||
file_path,
|
||||
audio_format,
|
||||
convert_format,
|
||||
)
|
||||
|
||||
r = None
|
||||
|
@ -133,6 +133,7 @@ def upload_file(
|
||||
"audio/ogg",
|
||||
"audio/x-m4a",
|
||||
"audio/webm",
|
||||
"video/webm",
|
||||
)
|
||||
):
|
||||
file_path = Storage.get_file(file_path)
|
||||
@ -150,7 +151,6 @@ def upload_file(
|
||||
"video/mp4",
|
||||
"video/ogg",
|
||||
"video/quicktime",
|
||||
"video/webm",
|
||||
]:
|
||||
process_file(request, ProcessFileForm(file_id=id), user=user)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user