mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
feat: custom stt content type
Co-Authored-By: Bryan Berns <berns@uwalumni.com>
This commit is contained in:
@@ -2906,6 +2906,12 @@ AUDIO_STT_MODEL = PersistentConfig(
|
||||
os.getenv("AUDIO_STT_MODEL", ""),
|
||||
)
|
||||
|
||||
AUDIO_STT_SUPPORTED_CONTENT_TYPES = PersistentConfig(
|
||||
"AUDIO_STT_SUPPORTED_CONTENT_TYPES",
|
||||
"audio.stt.supported_content_types",
|
||||
os.getenv("AUDIO_STT_SUPPORTED_CONTENT_TYPES", "").split(","),
|
||||
)
|
||||
|
||||
AUDIO_STT_AZURE_API_KEY = PersistentConfig(
|
||||
"AUDIO_STT_AZURE_API_KEY",
|
||||
"audio.stt.azure.api_key",
|
||||
|
||||
@@ -159,6 +159,7 @@ from open_webui.config import (
|
||||
# Audio
|
||||
AUDIO_STT_ENGINE,
|
||||
AUDIO_STT_MODEL,
|
||||
AUDIO_STT_SUPPORTED_CONTENT_TYPES,
|
||||
AUDIO_STT_OPENAI_API_BASE_URL,
|
||||
AUDIO_STT_OPENAI_API_KEY,
|
||||
AUDIO_STT_AZURE_API_KEY,
|
||||
@@ -959,10 +960,12 @@ app.state.config.IMAGE_STEPS = IMAGE_STEPS
|
||||
#
|
||||
########################################
|
||||
|
||||
app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
|
||||
app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
|
||||
app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
|
||||
app.state.config.STT_MODEL = AUDIO_STT_MODEL
|
||||
app.state.config.STT_SUPPORTED_CONTENT_TYPES = AUDIO_STT_SUPPORTED_CONTENT_TYPES
|
||||
|
||||
app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL
|
||||
app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
|
||||
|
||||
app.state.config.WHISPER_MODEL = WHISPER_MODEL
|
||||
app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER
|
||||
|
||||
@@ -10,7 +10,7 @@ from pydub.silence import split_on_silence
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Optional
|
||||
|
||||
|
||||
from fnmatch import fnmatch
|
||||
import aiohttp
|
||||
import aiofiles
|
||||
import requests
|
||||
@@ -168,6 +168,7 @@ class STTConfigForm(BaseModel):
|
||||
OPENAI_API_KEY: str
|
||||
ENGINE: str
|
||||
MODEL: str
|
||||
SUPPORTED_CONTENT_TYPES: list[str] = []
|
||||
WHISPER_MODEL: str
|
||||
DEEPGRAM_API_KEY: str
|
||||
AZURE_API_KEY: str
|
||||
@@ -202,6 +203,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
||||
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
|
||||
"ENGINE": request.app.state.config.STT_ENGINE,
|
||||
"MODEL": request.app.state.config.STT_MODEL,
|
||||
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
|
||||
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
||||
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
|
||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||
@@ -236,6 +238,10 @@ async def update_audio_config(
|
||||
request.app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
|
||||
request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
|
||||
request.app.state.config.STT_MODEL = form_data.stt.MODEL
|
||||
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES = (
|
||||
form_data.stt.SUPPORTED_CONTENT_TYPES
|
||||
)
|
||||
|
||||
request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
|
||||
request.app.state.config.DEEPGRAM_API_KEY = form_data.stt.DEEPGRAM_API_KEY
|
||||
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
|
||||
@@ -269,6 +275,7 @@ async def update_audio_config(
|
||||
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
|
||||
"ENGINE": request.app.state.config.STT_ENGINE,
|
||||
"MODEL": request.app.state.config.STT_MODEL,
|
||||
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
|
||||
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
||||
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
|
||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||
@@ -910,10 +917,14 @@ def transcription(
|
||||
):
|
||||
log.info(f"file.content_type: {file.content_type}")
|
||||
|
||||
SUPPORTED_CONTENT_TYPES = {"video/webm"} # Extend if you add more video types!
|
||||
if not (
|
||||
file.content_type.startswith("audio/")
|
||||
or file.content_type in SUPPORTED_CONTENT_TYPES
|
||||
supported_content_types = request.app.state.config.STT_SUPPORTED_CONTENT_TYPES or [
|
||||
"audio/*",
|
||||
"video/webm",
|
||||
]
|
||||
|
||||
if not any(
|
||||
fnmatch(file.content_type, content_type)
|
||||
for content_type in supported_content_types
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
|
||||
@@ -155,9 +155,18 @@ def upload_file(
|
||||
if process:
|
||||
try:
|
||||
if file.content_type:
|
||||
if file.content_type.startswith("audio/") or file.content_type in {
|
||||
"video/webm"
|
||||
}:
|
||||
stt_supported_content_types = (
|
||||
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES
|
||||
or [
|
||||
"audio/*",
|
||||
"video/webm",
|
||||
]
|
||||
)
|
||||
|
||||
if any(
|
||||
fnmatch(file.content_type, content_type)
|
||||
for content_type in stt_supported_content_types
|
||||
):
|
||||
file_path = Storage.get_file(file_path)
|
||||
result = transcribe(request, file_path, file_metadata)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user