feat: external stt

2025-06-26 18:26:48 +00:00 · 2024-06-07 20:31:52 -07:00 · 2024-06-07 20:31:52 -07:00 · e516374d54
commit e516374d54
parent 55dc6c1b3b
1 changed files with 60 additions and 26 deletions
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@ -240,39 +240,73 @@ def transcribe(
            f.write(contents)
            f.close()
-        whisper_kwargs = {
+        if app.state.config.STT_ENGINE == "":
-            "model_size_or_path": WHISPER_MODEL,
+            whisper_kwargs = {
-            "device": whisper_device_type,
+                "model_size_or_path": WHISPER_MODEL,
-            "compute_type": "int8",
+                "device": whisper_device_type,
-            "download_root": WHISPER_MODEL_DIR,
+                "compute_type": "int8",
-            "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
+                "download_root": WHISPER_MODEL_DIR,
-        }
+                "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
            }
-        log.debug(f"whisper_kwargs: {whisper_kwargs}")
+            log.debug(f"whisper_kwargs: {whisper_kwargs}")
-        try:
+            try:
-            model = WhisperModel(**whisper_kwargs)
+                model = WhisperModel(**whisper_kwargs)
-        except:
+            except:
-            log.warning(
+                log.warning(
-                "WhisperModel initialization failed, attempting download with local_files_only=False"
+                    "WhisperModel initialization failed, attempting download with local_files_only=False"
                )
                whisper_kwargs["local_files_only"] = False
                model = WhisperModel(**whisper_kwargs)
            segments, info = model.transcribe(file_path, beam_size=5)
            log.info(
                "Detected language '%s' with probability %f"
                % (info.language, info.language_probability)
            )
            whisper_kwargs["local_files_only"] = False
            model = WhisperModel(**whisper_kwargs)
-        segments, info = model.transcribe(file_path, beam_size=5)
+            transcript = "".join([segment.text for segment in list(segments)])
        log.info(
            "Detected language '%s' with probability %f"
            % (info.language, info.language_probability)
        )
-        transcript = "".join([segment.text for segment in list(segments)])
+            # save the transcript to a json file
            transcript_file = f"{file_dir}/{id}.json"
            with open(transcript_file, "w") as f:
                json.dump({"transcript": transcript}, f)
-        # save the transcript to a json file
+            return {"text": transcript.strip()}
        transcript_file = f"{file_dir}/{id}.json"
        with open(transcript_file, "w") as f:
            json.dump({"transcript": transcript}, f)
-        return {"text": transcript.strip()}
+        elif app.state.config.STT_ENGINE == "openai":
            headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"}
            files = {"file": (filename, open(file_path, "rb"))}
            data = {"model": "whisper-1"}
            r = None
            try:
                r = requests.post(
                    url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions",
                    headers=headers,
                    files=files,
                    data=data,
                )
                r.raise_for_status()
                return r.json()
            except Exception as e:
                log.exception(e)
                error_detail = "Open WebUI: Server Connection Error"
                if r is not None:
                    try:
                        res = r.json()
                        if "error" in res:
                            error_detail = f"External: {res['error']['message']}"
                    except:
                        error_detail = f"External: {e}"
                raise HTTPException(
                    status_code=r.status_code if r != None else 500,
                    detail=error_detail,
                )
    except Exception as e:
        log.exception(e)