Merge pull request #4018 from justinh-rahb/elevenlabs-voice-names

feat: Fetch ElevenLabs voice ID by name
2025-03-23 06:17:24 +00:00 · 2024-08-02 18:44:48 +02:00 · 2024-08-02 18:44:48 +02:00 · 3f53abb233
commit 3f53abb233
parent 67efd0dd39 c6c298b412
2 changed files with 71 additions and 6 deletions
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@ -134,6 +134,35 @@ def convert_mp4_to_wav(file_path, output_path):
    print(f"Converted {file_path} to {output_path}")


+async def get_available_voices():
+    if app.state.config.TTS_ENGINE != "elevenlabs":
+        return {}
+
+    base_url = "https://api.elevenlabs.io/v1"
+    headers = {
+        "xi-api-key": app.state.config.TTS_API_KEY,
+        "Content-Type": "application/json",
+    }
+
+    voices_url = f"{base_url}/voices"
+    try:
+        response = requests.get(voices_url, headers=headers)
+        response.raise_for_status()
+        voices_data = response.json()
+
+        voice_options = {}
+        for voice in voices_data.get("voices", []):
+            voice_name = voice["name"]
+            voice_id = voice["voice_id"]
+            voice_options[voice_name] = voice_id
+
+        return voice_options
+
+    except requests.RequestException as e:
+        log.error(f"Error fetching voices: {str(e)}")
+        return {}
+
+
@app.get("/config")
 async def get_audio_config(user=Depends(get_admin_user)):
    return {
@ -258,9 +287,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
            payload = json.loads(body.decode("utf-8"))
        except Exception as e:
            log.exception(e)
-            pass
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")

-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
+        voice_options = await get_available_voices()
+        voice_id = voice_options.get(payload['voice'])
+
+        if not voice_id:
+            raise HTTPException(status_code=400, detail="Invalid voice name")
+
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

        headers = {
            "Accept": "audio/mpeg",
@ -435,3 +470,9 @@ def transcribe(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )
+
+
+@app.get("/voices")
+async def get_voices(user=Depends(get_verified_user)):
+    voices = await get_available_voices()
+    return {"voices": list(voices.keys())}
--- a/src/lib/components/admin/Settings/Audio.svelte
+++ b/src/lib/components/admin/Settings/Audio.svelte
@ -56,6 +56,23 @@
 		}, 100);
 	};

+    // Fetch available ElevenLabs voices
+    const getVoices = async () => {
+        const response = await fetch('/voices', {
+            method: 'GET',
+            headers: {
+                'Authorization': `Bearer ${localStorage.token}`
+            }
+        });
+
+        if (response.ok) {
+            const data = await response.json();
+            voices = data.voices.map(name => ({ name })); // Update voices array with fetched names
+        } else {
+            toast.error('Failed to fetch voices');
+        }
+    };
+
 	const updateConfigHandler = async () => {
 		const res = await updateAudioConfig(localStorage.token, {
 			tts: {
@ -82,6 +99,9 @@
 	};

 	onMount(async () => {
+        // Fetch available voices on component mount
+        await getVoices(); 
+        
 		const res = await getAudioConfig(localStorage.token);

 		if (res) {
@ -104,6 +124,8 @@
 		if (TTS_ENGINE === 'openai') {
 			getOpenAIVoices();
 			getOpenAIModels();
+        } else if(TTS_ENGINE === 'elevenlabs') {
+            await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs
 		} else {
 			getWebAPIVoices();
 		}
@ -185,11 +207,13 @@
 							class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 							bind:value={TTS_ENGINE}
 							placeholder="Select a mode"
-							on:change={(e) => {
+							on:change={async (e) => {
 								if (e.target.value === 'openai') {
 									getOpenAIVoices();
 									TTS_VOICE = 'alloy';
 									TTS_MODEL = 'tts-1';
+								} else if(e.target.value === 'elevenlabs') {
+									await getVoices();
 								} else {
 									getWebAPIVoices();
 									TTS_VOICE = '';
@ -232,7 +256,7 @@

 				<hr class=" dark:border-gray-850 my-2" />

-				{#if TTS_ENGINE === ''}
+				{#if TTS_ENGINE !== ''}
 					<div>
 						<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
 						<div class="flex w-full">
@ -244,9 +268,9 @@
 									<option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
 									{#each voices as voice}
 										<option
-											value={voice.voiceURI}
+											value={voice.name}
 											class="bg-gray-100 dark:bg-gray-700"
-											selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
+											selected={TTS_VOICE === voice.name}>{voice.name}</option
 										>
 									{/each}
 								</select>