added azure speech service support

2025-04-29 10:42:55 +00:00 · 2024-09-17 09:13:10 +01:00 · 2024-09-17 09:13:10 +01:00 · d6b68f405e
commit d6b68f405e
parent 351bbdb36c
2 changed files with 26 additions and 0 deletions
--- a/backend/open_webui/apps/audio/main.py
+++ b/backend/open_webui/apps/audio/main.py
@ -301,6 +301,30 @@ async def speech(request: Request, user=Depends(get_verified_user)):
                detail=error_detail,
            )

+    elif app.state.config.TTS_ENGINE == "azurespeechservice":
+        payload = None
+        try:
+            payload = json.loads(body.decode("utf-8"))
+        except Exception as e:
+            log.exception(e)
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
+
+        import azure.cognitiveservices.speech as speechsdk
+
+        config = speechsdk.SpeechConfig(subscription=app.state.config.TTS_API_KEY, region="uksouth")
+        speaker_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=False, filename=str(file_path))
+
+        client = speechsdk.SpeechSynthesizer(speech_config=config, audio_config=speaker_config)
+        result = client.speak_text(payload["input"])
+
+        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+            return FileResponse(file_path)
+        else:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Error synthesizing speech - {result.reason}")
+
+

@app.post("/transcriptions")
 def transcribe(
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -79,6 +79,8 @@ extract_msg
 pydub
 duckduckgo-search~=6.2.11

+azure-cognitiveservices-speech==1.40.0
+
 ## Tests
 docker~=7.1.0
 pytest~=8.3.2