diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index c565bf481..4113e9884 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -134,6 +134,35 @@ def convert_mp4_to_wav(file_path, output_path): print(f"Converted {file_path} to {output_path}") +async def get_available_voices(): + if app.state.config.TTS_ENGINE != "elevenlabs": + return {} + + base_url = "https://api.elevenlabs.io/v1" + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + voices_url = f"{base_url}/voices" + try: + response = requests.get(voices_url, headers=headers) + response.raise_for_status() + voices_data = response.json() + + voice_options = {} + for voice in voices_data.get("voices", []): + voice_name = voice["name"] + voice_id = voice["voice_id"] + voice_options[voice_name] = voice_id + + return voice_options + + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + return {} + + @app.get("/config") async def get_audio_config(user=Depends(get_admin_user)): return { @@ -258,9 +287,15 @@ async def speech(request: Request, user=Depends(get_verified_user)): payload = json.loads(body.decode("utf-8")) except Exception as e: log.exception(e) - pass + raise HTTPException(status_code=400, detail="Invalid JSON payload") - url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}" + voice_options = await get_available_voices() + voice_id = voice_options.get(payload['voice']) + + if not voice_id: + raise HTTPException(status_code=400, detail="Invalid voice name") + + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { "Accept": "audio/mpeg", @@ -435,3 +470,9 @@ def transcribe( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT(e), ) + + +@app.get("/voices") +async def get_voices(user=Depends(get_verified_user)): + voices = await get_available_voices() + return {"voices": list(voices.keys())} diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 50ce7418e..91aa3a612 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -56,6 +56,23 @@ }, 100); }; + // Fetch available ElevenLabs voices + const getVoices = async () => { + const response = await fetch('/voices', { + method: 'GET', + headers: { + 'Authorization': `Bearer ${localStorage.token}` + } + }); + + if (response.ok) { + const data = await response.json(); + voices = data.voices.map(name => ({ name })); // Update voices array with fetched names + } else { + toast.error('Failed to fetch voices'); + } + }; + const updateConfigHandler = async () => { const res = await updateAudioConfig(localStorage.token, { tts: { @@ -82,6 +99,9 @@ }; onMount(async () => { + // Fetch available voices on component mount + await getVoices(); + const res = await getAudioConfig(localStorage.token); if (res) { @@ -104,6 +124,8 @@ if (TTS_ENGINE === 'openai') { getOpenAIVoices(); getOpenAIModels(); + } else if(TTS_ENGINE === 'elevenlabs') { + await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs } else { getWebAPIVoices(); } @@ -185,11 +207,13 @@ class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" bind:value={TTS_ENGINE} placeholder="Select a mode" - on:change={(e) => { + on:change={async (e) => { if (e.target.value === 'openai') { getOpenAIVoices(); TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; + } else if(e.target.value === 'elevenlabs') { + await getVoices(); } else { getWebAPIVoices(); TTS_VOICE = ''; @@ -232,7 +256,7 @@
- {#if TTS_ENGINE === ''} + {#if TTS_ENGINE !== ''}
{$i18n.t('TTS Voice')}
@@ -244,9 +268,9 @@ {#each voices as voice} {voice.name} {/each}