feat: custom stt content type

Co-Authored-By: Bryan Berns <berns@uwalumni.com>
This commit is contained in:
Timothy Jaeryang Baek
2025-06-16 16:13:40 +04:00
parent 6a5aac43df
commit 7a1afa9c66
5 changed files with 232 additions and 187 deletions

View File

@@ -10,7 +10,7 @@ from pydub.silence import split_on_silence
from concurrent.futures import ThreadPoolExecutor
from typing import Optional
from fnmatch import fnmatch
import aiohttp
import aiofiles
import requests
@@ -168,6 +168,7 @@ class STTConfigForm(BaseModel):
OPENAI_API_KEY: str
ENGINE: str
MODEL: str
SUPPORTED_CONTENT_TYPES: list[str] = []
WHISPER_MODEL: str
DEEPGRAM_API_KEY: str
AZURE_API_KEY: str
@@ -202,6 +203,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
"ENGINE": request.app.state.config.STT_ENGINE,
"MODEL": request.app.state.config.STT_MODEL,
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
@@ -236,6 +238,10 @@ async def update_audio_config(
request.app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
request.app.state.config.STT_MODEL = form_data.stt.MODEL
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES = (
form_data.stt.SUPPORTED_CONTENT_TYPES
)
request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
request.app.state.config.DEEPGRAM_API_KEY = form_data.stt.DEEPGRAM_API_KEY
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
@@ -269,6 +275,7 @@ async def update_audio_config(
"OPENAI_API_KEY": request.app.state.config.STT_OPENAI_API_KEY,
"ENGINE": request.app.state.config.STT_ENGINE,
"MODEL": request.app.state.config.STT_MODEL,
"SUPPORTED_CONTENT_TYPES": request.app.state.config.STT_SUPPORTED_CONTENT_TYPES,
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
@@ -910,10 +917,14 @@ def transcription(
):
log.info(f"file.content_type: {file.content_type}")
SUPPORTED_CONTENT_TYPES = {"video/webm"} # Extend if you add more video types!
if not (
file.content_type.startswith("audio/")
or file.content_type in SUPPORTED_CONTENT_TYPES
supported_content_types = request.app.state.config.STT_SUPPORTED_CONTENT_TYPES or [
"audio/*",
"video/webm",
]
if not any(
fnmatch(file.content_type, content_type)
for content_type in supported_content_types
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,

View File

@@ -155,9 +155,18 @@ def upload_file(
if process:
try:
if file.content_type:
if file.content_type.startswith("audio/") or file.content_type in {
"video/webm"
}:
stt_supported_content_types = (
request.app.state.config.STT_SUPPORTED_CONTENT_TYPES
or [
"audio/*",
"video/webm",
]
)
if any(
fnmatch(file.content_type, content_type)
for content_type in stt_supported_content_types
):
file_path = Storage.get_file(file_path)
result = transcribe(request, file_path, file_metadata)