From 1efa708f8325e77165f403c0a60e717752c5d7d5 Mon Sep 17 00:00:00 2001 From: nathaniel Date: Sun, 27 Apr 2025 05:58:06 +0100 Subject: [PATCH 1/3] Added WHISPER_LANGUAGE env variable. If set to a country's ISO2, constrains Whisper's stt to that language. Detects language as normal if unset --- backend/open_webui/config.py | 5 +++++ backend/open_webui/main.py | 2 ++ backend/open_webui/routers/audio.py | 1 + 3 files changed, 8 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 3b40977f2..04500411f 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2510,6 +2510,11 @@ WHISPER_VAD_FILTER = PersistentConfig( os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true", ) +WHISPER_LANGUAGE = PersistentConfig( + "WHISPER_LANGUAGE", + "audio.stt.whisper_language", + os.getenv("WHISPER_LANGUAGE", None).lower(), +) # Add Deepgram configuration DEEPGRAM_API_KEY = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 56ea17fa1..3462661dd 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -167,6 +167,7 @@ from open_webui.config import ( WEB_LOADER_ENGINE, WHISPER_MODEL, WHISPER_VAD_FILTER, + WHISPER_LANGUAGE, DEEPGRAM_API_KEY, WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, @@ -791,6 +792,7 @@ app.state.config.STT_MODEL = AUDIO_STT_MODEL app.state.config.WHISPER_MODEL = WHISPER_MODEL app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER +app.state.config.WHISPER_LANGUAGE = WHISPER_LANGUAGE app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index da51d1ecf..c086aea00 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -501,6 +501,7 @@ def transcribe(request: Request, file_path): file_path, beam_size=5, vad_filter=request.app.state.config.WHISPER_VAD_FILTER, + language=request.app.state.config.WHISPER_LANGUAGE ) log.info( "Detected language '%s' with probability %f" From ef7acfbf3d78447b89cc0a4336286835367fb5b0 Mon Sep 17 00:00:00 2001 From: nathaniel Date: Thu, 1 May 2025 21:33:57 +0100 Subject: [PATCH 2/3] WHISPER_LANGUAGE no longer a "PersistentConfig" variable (Was not appropriate with how WHISPER_LANGUAGE is currently configured). --- backend/open_webui/config.py | 6 +----- backend/open_webui/main.py | 1 - backend/open_webui/routers/audio.py | 3 ++- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 04500411f..b5d37cc8e 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2510,11 +2510,7 @@ WHISPER_VAD_FILTER = PersistentConfig( os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true", ) -WHISPER_LANGUAGE = PersistentConfig( - "WHISPER_LANGUAGE", - "audio.stt.whisper_language", - os.getenv("WHISPER_LANGUAGE", None).lower(), -) +WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", None).lower() # Add Deepgram configuration DEEPGRAM_API_KEY = PersistentConfig( diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 3462661dd..26d9c29c5 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -792,7 +792,6 @@ app.state.config.STT_MODEL = AUDIO_STT_MODEL app.state.config.WHISPER_MODEL = WHISPER_MODEL app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER -app.state.config.WHISPER_LANGUAGE = WHISPER_LANGUAGE app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index c086aea00..513a8323a 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -33,6 +33,7 @@ from open_webui.config import ( WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, CACHE_DIR, + WHISPER_LANGUAGE ) from open_webui.constants import ERROR_MESSAGES @@ -501,7 +502,7 @@ def transcribe(request: Request, file_path): file_path, beam_size=5, vad_filter=request.app.state.config.WHISPER_VAD_FILTER, - language=request.app.state.config.WHISPER_LANGUAGE + language=WHISPER_LANGUAGE ) log.info( "Detected language '%s' with probability %f" From cc14aacaaa6248426aa4bd1c3b2ee984b4ba850b Mon Sep 17 00:00:00 2001 From: nathaniel Date: Thu, 1 May 2025 22:03:49 +0100 Subject: [PATCH 3/3] Improvements to parsing of WHISPER_LANGUAGE environment variable (Setting as empty string now equivalent to unsetting/removing WHISPER_LANGUAGE). Resolved crash caused when unset --- backend/open_webui/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index b5d37cc8e..6d4ddc7e6 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2510,7 +2510,7 @@ WHISPER_VAD_FILTER = PersistentConfig( os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true", ) -WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", None).lower() +WHISPER_LANGUAGE = os.getenv("WHISPER_LANGUAGE", "").lower() or None # Add Deepgram configuration DEEPGRAM_API_KEY = PersistentConfig(