From 429242b4d3bb6e24badfc403f690540b3b44cd6f Mon Sep 17 00:00:00 2001 From: Self Denial Date: Wed, 10 Apr 2024 20:30:00 -0600 Subject: [PATCH 1/3] Introduce Whisper model auto-update control. * Introduce WHISPER_MODEL_AUTO_UPDATE env var * Pass local_files_only to WhisperModel() * Handle cases where auto-update is disabled but model is non-existent --- backend/apps/audio/main.py | 23 +++++++++++++++++------ backend/config.py | 3 +++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 02d1f5e8e..4f65b5f59 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -28,6 +28,7 @@ from config import ( UPLOAD_DIR, WHISPER_MODEL, WHISPER_MODEL_DIR, + WHISPER_MODEL_AUTO_UPDATE, DEVICE_TYPE, ) @@ -69,12 +70,22 @@ def transcribe( f.write(contents) f.close() - model = WhisperModel( - WHISPER_MODEL, - device=whisper_device_type, - compute_type="int8", - download_root=WHISPER_MODEL_DIR, - ) + whisper_kwargs = { + "model_size_or_path": WHISPER_MODEL, + "device": whisper_device_type, + "compute_type": "int8", + "download_root": WHISPER_MODEL_DIR, + "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, + } + + log.debug(f"whisper_kwargs: {whisper_kwargs}") + + try: + model = WhisperModel(**whisper_kwargs) + except: + log.debug("WhisperModel initialization failed, attempting download with local_files_only=False") + whisper_kwargs["local_files_only"] = False + model = WhisperModel(**whisper_kwargs) segments, info = model.transcribe(file_path, beam_size=5) log.info( diff --git a/backend/config.py b/backend/config.py index 6e3cf92a9..4436a5a06 100644 --- a/backend/config.py +++ b/backend/config.py @@ -446,6 +446,9 @@ Query: [query]""" WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base") WHISPER_MODEL_DIR = os.getenv("WHISPER_MODEL_DIR", f"{CACHE_DIR}/whisper/models") +WHISPER_MODEL_AUTO_UPDATE = ( + os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true" +) #################################### From 81c8717d75170a5d5a8526dea35894060105d222 Mon Sep 17 00:00:00 2001 From: Self Denial Date: Wed, 10 Apr 2024 20:44:44 -0600 Subject: [PATCH 2/3] Format fix --- backend/apps/audio/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 4f65b5f59..0111ebaf8 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -80,10 +80,12 @@ def transcribe( log.debug(f"whisper_kwargs: {whisper_kwargs}") - try: + try: model = WhisperModel(**whisper_kwargs) except: - log.debug("WhisperModel initialization failed, attempting download with local_files_only=False") + log.debug( + "WhisperModel initialization failed, attempting download with local_files_only=False" + ) whisper_kwargs["local_files_only"] = False model = WhisperModel(**whisper_kwargs) From faa58841508c181a08e816c1a90d487910adf20c Mon Sep 17 00:00:00 2001 From: Self Denial Date: Sat, 13 Apr 2024 03:18:13 -0600 Subject: [PATCH 3/3] Use log.warning instead of log.debug --- backend/apps/audio/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 0111ebaf8..f93b50f6e 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -83,7 +83,7 @@ def transcribe( try: model = WhisperModel(**whisper_kwargs) except: - log.debug( + log.warning( "WhisperModel initialization failed, attempting download with local_files_only=False" ) whisper_kwargs["local_files_only"] = False