diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index bd822d06d..45a5dfbdb 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2489,6 +2489,13 @@ WHISPER_MODEL_AUTO_UPDATE = ( and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true" ) +WHISPER_VAD_FILTER = PersistentConfig( + "WHISPER_VAD_FILTER", + "audio.stt.whisper_vad_filter", + os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true", +) + + # Add Deepgram configuration DEEPGRAM_API_KEY = PersistentConfig( "DEEPGRAM_API_KEY", @@ -2496,6 +2503,7 @@ DEEPGRAM_API_KEY = PersistentConfig( os.getenv("DEEPGRAM_API_KEY", ""), ) + AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig( "AUDIO_STT_OPENAI_API_BASE_URL", "audio.stt.openai.api_base_url", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index f8958c4b5..56ea17fa1 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -166,6 +166,7 @@ from open_webui.config import ( FIRECRAWL_API_KEY, WEB_LOADER_ENGINE, WHISPER_MODEL, + WHISPER_VAD_FILTER, DEEPGRAM_API_KEY, WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, @@ -789,6 +790,7 @@ app.state.config.STT_ENGINE = AUDIO_STT_ENGINE app.state.config.STT_MODEL = AUDIO_STT_MODEL app.state.config.WHISPER_MODEL = WHISPER_MODEL +app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index c5d5f99af..da51d1ecf 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -497,7 +497,11 @@ def transcribe(request: Request, file_path): ) model = request.app.state.faster_whisper_model - segments, info = model.transcribe(file_path, beam_size=5, vad_filter=True) + segments, info = model.transcribe( + file_path, + beam_size=5, + vad_filter=request.app.state.config.WHISPER_VAD_FILTER, + ) log.info( "Detected language '%s' with probability %f" % (info.language, info.language_probability)