Merge pull request #4299 from open-webui/dev-elevenlabs

feat: Fetch ElevenLabs voice ID by name
2025-03-28 19:24:29 +00:00 · 2024-08-02 19:29:24 +02:00 · 2024-08-02 19:29:24 +02:00 · 9d5e3e2a91
commit 9d5e3e2a91
parent 67efd0dd39 7f260938db
4 changed files with 211 additions and 74 deletions
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@ -10,12 +10,12 @@ from fastapi import (
    File,
    Form,
 )
 from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List
 import uuid
 import requests
 import hashlib
@ -31,6 +31,7 @@ from utils.utils import (
 )
 from utils.misc import calculate_sha256
 from config import (
    SRC_LOG_LEVELS,
    CACHE_DIR,
@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
            )
    elif app.state.config.TTS_ENGINE == "elevenlabs":
        payload = None
        try:
            payload = json.loads(body.decode("utf-8"))
        except Exception as e:
            log.exception(e)
-            pass
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
+        voice_id = payload.get("voice", "")
        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
        headers = {
            "Accept": "audio/mpeg",
@ -435,3 +436,69 @@ def transcribe(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )
 def get_available_models() -> List[dict]:
    if app.state.config.TTS_ENGINE == "openai":
        return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
    elif app.state.config.TTS_ENGINE == "elevenlabs":
        headers = {
            "xi-api-key": app.state.config.TTS_API_KEY,
            "Content-Type": "application/json",
        }
        try:
            response = requests.get(
                "https://api.elevenlabs.io/v1/models", headers=headers
            )
            response.raise_for_status()
            models = response.json()
            return [
                {"name": model["name"], "id": model["model_id"]} for model in models
            ]
        except requests.RequestException as e:
            log.error(f"Error fetching voices: {str(e)}")
    return []
@app.get("/models")
 async def get_models(user=Depends(get_verified_user)):
    return {"models": get_available_models()}
 def get_available_voices() -> List[dict]:
    if app.state.config.TTS_ENGINE == "openai":
        return [
            {"name": "alloy", "id": "alloy"},
            {"name": "echo", "id": "echo"},
            {"name": "fable", "id": "fable"},
            {"name": "onyx", "id": "onyx"},
            {"name": "nova", "id": "nova"},
            {"name": "shimmer", "id": "shimmer"},
        ]
    elif app.state.config.TTS_ENGINE == "elevenlabs":
        headers = {
            "xi-api-key": app.state.config.TTS_API_KEY,
            "Content-Type": "application/json",
        }
        try:
            response = requests.get(
                "https://api.elevenlabs.io/v1/voices", headers=headers
            )
            response.raise_for_status()
            voices_data = response.json()
            voices = []
            for voice in voices_data.get("voices", []):
                voices.append({"name": voice["name"], "id": voice["voice_id"]})
            return voices
        except requests.RequestException as e:
            log.error(f"Error fetching voices: {str(e)}")
    return []
@app.get("/voices")
 async def get_voices(user=Depends(get_verified_user)):
    return {"voices": get_available_voices()}
--- a/src/lib/apis/audio/index.ts
+++ b/src/lib/apis/audio/index.ts
@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async (
 	return res;
 };
 export const getModels = async (token: string = '') => {
 	let error = null;
 	const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
 		method: 'GET',
 		headers: {
 			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		}
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			error = err.detail;
 			console.log(err);
 			return null;
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
 export const getVoices = async (token: string = '') => {
 	let error = null;
 	const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, {
 		method: 'GET',
 		headers: {
 			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		}
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			error = err.detail;
 			console.log(err);
 			return null;
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
--- a/src/lib/components/admin/Settings/Audio.svelte
+++ b/src/lib/components/admin/Settings/Audio.svelte
@ -1,13 +1,19 @@
 <script lang="ts">
 	import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
 	import { user, settings, config } from '$lib/stores';
 	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	import { toast } from 'svelte-sonner';
-	import Switch from '$lib/components/common/Switch.svelte';
+	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	import { getBackendConfig } from '$lib/apis';
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	const dispatch = createEventDispatcher();
 	import { getBackendConfig } from '$lib/apis';
 	import {
 		getAudioConfig,
 		updateAudioConfig,
 		getModels as _getModels,
 		getVoices as _getVoices
 	} from '$lib/apis/audio';
 	import { user, settings, config } from '$lib/stores';
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	const i18n = getContext('i18n');
 	export let saveHandler: Function;
@ -30,30 +36,41 @@
 	let models = [];
 	let nonLocalVoices = false;
-	const getOpenAIVoices = () => {
+	const getModels = async () => {
-		voices = [
+		if (TTS_ENGINE === '') {
-			{ name: 'alloy' },
+			models = [];
-			{ name: 'echo' },
+		} else {
-			{ name: 'fable' },
+			const res = await _getModels(localStorage.token).catch((e) => {
-			{ name: 'onyx' },
+				toast.error(e);
-			{ name: 'nova' },
+			});
 			{ name: 'shimmer' }
 		];
 	};
-	const getOpenAIModels = () => {
+			if (res) {
-		models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }];
+				console.log(res);
-	};
+				models = res.models;
 	const getWebAPIVoices = () => {
 		const getVoicesLoop = setInterval(async () => {
 			voices = await speechSynthesis.getVoices();
 			// do your loop
 			if (voices.length > 0) {
 				clearInterval(getVoicesLoop);
 			}
-		}, 100);
+		}
 	};
 	const getVoices = async () => {
 		if (TTS_ENGINE === '') {
 			const getVoicesLoop = setInterval(async () => {
 				voices = await speechSynthesis.getVoices();
 				// do your loop
 				if (voices.length > 0) {
 					clearInterval(getVoicesLoop);
 				}
 			}, 100);
 		} else {
 			const res = await _getVoices(localStorage.token).catch((e) => {
 				toast.error(e);
 			});
 			if (res) {
 				console.log(res);
 				voices = res.voices;
 			}
 		}
 	};
 	const updateConfigHandler = async () => {
@ -101,12 +118,8 @@
 			STT_MODEL = res.stt.MODEL;
 		}
-		if (TTS_ENGINE === 'openai') {
+		await getVoices();
-			getOpenAIVoices();
+		await getModels();
 			getOpenAIModels();
 		} else {
 			getWebAPIVoices();
 		}
 	});
 </script>
@ -185,13 +198,15 @@
 							class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 							bind:value={TTS_ENGINE}
 							placeholder="Select a mode"
-							on:change={(e) => {
+							on:change={async (e) => {
 								await updateConfigHandler();
 								await getVoices();
 								await getModels();
 								if (e.target.value === 'openai') {
 									getOpenAIVoices();
 									TTS_VOICE = 'alloy';
 									TTS_MODEL = 'tts-1';
 								} else {
 									getWebAPIVoices();
 									TTS_VOICE = '';
 									TTS_MODEL = '';
 								}
@ -268,7 +283,7 @@
 									<datalist id="voice-list">
 										{#each voices as voice}
-											<option value={voice.name} />
+											<option value={voice.id}>{voice.name}</option>
 										{/each}
 									</datalist>
 								</div>
@ -279,15 +294,15 @@
 							<div class="flex w-full">
 								<div class="flex-1">
 									<input
-										list="model-list"
+										list="tts-model-list"
 										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
 										bind:value={TTS_MODEL}
 										placeholder="Select a model"
 									/>
-									<datalist id="model-list">
+									<datalist id="tts-model-list">
 										{#each models as model}
-											<option value={model.name} />
+											<option value={model.id} />
 										{/each}
 									</datalist>
 								</div>
@ -309,7 +324,7 @@
 									<datalist id="voice-list">
 										{#each voices as voice}
-											<option value={voice.name} />
+											<option value={voice.id}>{voice.name}</option>
 										{/each}
 									</datalist>
 								</div>
@ -320,15 +335,15 @@
 							<div class="flex w-full">
 								<div class="flex-1">
 									<input
-										list="model-list"
+										list="tts-model-list"
 										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
 										bind:value={TTS_MODEL}
 										placeholder="Select a model"
 									/>
-									<datalist id="model-list">
+									<datalist id="tts-model-list">
 										{#each models as model}
-											<option value={model.name} />
+											<option value={model.id} />
 										{/each}
 									</datalist>
 								</div>
--- a/src/lib/components/chat/Settings/Audio.svelte
+++ b/src/lib/components/chat/Settings/Audio.svelte
@ -1,7 +1,10 @@
 <script lang="ts">
 	import { user, settings, config } from '$lib/stores';
 	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	import { toast } from 'svelte-sonner';
 	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	import { user, settings, config } from '$lib/stores';
 	import { getVoices as _getVoices } from '$lib/apis/audio';
 	import Switch from '$lib/components/common/Switch.svelte';
 	const dispatch = createEventDispatcher();
@ -20,26 +23,26 @@
 	let voices = [];
 	let voice = '';
-	const getOpenAIVoices = () => {
+	const getVoices = async () => {
-		voices = [
+		if ($config.audio.tts.engine === '') {
-			{ name: 'alloy' },
+			const getVoicesLoop = setInterval(async () => {
-			{ name: 'echo' },
+				voices = await speechSynthesis.getVoices();
 			{ name: 'fable' },
 			{ name: 'onyx' },
 			{ name: 'nova' },
 			{ name: 'shimmer' }
 		];
 	};
-	const getWebAPIVoices = () => {
+				// do your loop
-		const getVoicesLoop = setInterval(async () => {
+				if (voices.length > 0) {
-			voices = await speechSynthesis.getVoices();
+					clearInterval(getVoicesLoop);
 				}
 			}, 100);
 		} else {
 			const res = await _getVoices(localStorage.token).catch((e) => {
 				toast.error(e);
 			});
-			// do your loop
+			if (res) {
-			if (voices.length > 0) {
+				console.log(res);
-				clearInterval(getVoicesLoop);
+				voices = res.voices;
 			}
-		}, 100);
+		}
 	};
 	const toggleResponseAutoPlayback = async () => {
@ -61,11 +64,7 @@
 		voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
 		nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
-		if ($config.audio.tts.engine === 'openai') {
+		await getVoices();
 			getOpenAIVoices();
 		} else {
 			getWebAPIVoices();
 		}
 	});
 </script>
@ -195,7 +194,7 @@
 						<datalist id="voice-list">
 							{#each voices as voice}
-								<option value={voice.name} />
+								<option value={voice.id}>{voice.name}</option>
 							{/each}
 						</datalist>
 					</div>