From 0216ee101c2793dbd227c29ddf5a345a9d8899f4 Mon Sep 17 00:00:00 2001
From: Justin Hayes <52832301+justinh-rahb@users.noreply.github.com>
Date: Sat, 20 Jul 2024 02:56:00 -0400
Subject: [PATCH] enh: ElevenLabs voice name -> ID

---
 backend/apps/audio/main.py                    | 45 ++++++++++++++++++-
 .../components/admin/Settings/Audio.svelte    | 36 ++++++++++++---
 2 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py
index c565bf481..6c3695616 100644
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -134,6 +134,35 @@ def convert_mp4_to_wav(file_path, output_path):
     print(f"Converted {file_path} to {output_path}")
 
 
+async def fetch_available_voices():
+    if app.state.config.TTS_ENGINE != "elevenlabs":
+        return {}
+
+    base_url = "https://api.elevenlabs.io/v1"
+    headers = {
+        "xi-api-key": app.state.config.TTS_API_KEY,
+        "Content-Type": "application/json",
+    }
+
+    voices_url = f"{base_url}/voices"
+    try:
+        response = requests.get(voices_url, headers=headers)
+        response.raise_for_status()
+        voices_data = response.json()
+
+        voice_options = {}
+        for voice in voices_data.get("voices", []):
+            voice_name = voice["name"]
+            voice_id = voice["voice_id"]
+            voice_options[voice_name] = voice_id
+
+        return voice_options
+
+    except requests.RequestException as e:
+        log.error(f"Error fetching voices: {str(e)}")
+        return {}
+
+
 @app.get("/config")
 async def get_audio_config(user=Depends(get_admin_user)):
     return {
@@ -258,9 +287,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
             payload = json.loads(body.decode("utf-8"))
         except Exception as e:
             log.exception(e)
-            pass
+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
 
-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
+        voice_options = await fetch_available_voices()
+        voice_id = voice_options.get(payload['voice'])
+
+        if not voice_id:
+            raise HTTPException(status_code=400, detail="Invalid voice name")
+
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
 
         headers = {
             "Accept": "audio/mpeg",
@@ -435,3 +470,9 @@ def transcribe(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_MESSAGES.DEFAULT(e),
         )
+
+
+@app.get("/voices")
+async def get_voices(user=Depends(get_verified_user)):
+    voices = await fetch_available_voices()
+    return {"voices": list(voices.keys())}
diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte
index 50ce7418e..c3d36b093 100644
--- a/src/lib/components/admin/Settings/Audio.svelte
+++ b/src/lib/components/admin/Settings/Audio.svelte
@@ -56,6 +56,23 @@
 		}, 100);
 	};
 
+    // Fetch available ElevenLabs voices
+    const fetchAvailableVoices = async () => {
+        const response = await fetch('/voices', {
+            method: 'GET',
+            headers: {
+                'Authorization': `Bearer ${localStorage.token}`
+            }
+        });
+
+        if (response.ok) {
+            const data = await response.json();
+            voices = data.voices.map(name => ({ name })); // Update voices array with fetched names
+        } else {
+            toast.error('Failed to fetch voices');
+        }
+    };
+
 	const updateConfigHandler = async () => {
 		const res = await updateAudioConfig(localStorage.token, {
 			tts: {
@@ -82,6 +99,9 @@
 	};
 
 	onMount(async () => {
+        // Fetch available voices on component mount
+        await fetchAvailableVoices(); 
+        
 		const res = await getAudioConfig(localStorage.token);
 
 		if (res) {
@@ -104,6 +124,8 @@
 		if (TTS_ENGINE === 'openai') {
 			getOpenAIVoices();
 			getOpenAIModels();
+        } else if(TTS_ENGINE === 'elevenlabs') {
+            await fetchAvailableVoices(); // Fetch voices if TTS_ENGINE is ElevenLabs
 		} else {
 			getWebAPIVoices();
 		}
@@ -116,7 +138,7 @@
 		await updateConfigHandler();
 		dispatch('save');
 	}}
->
+	>
 	<div class=" space-y-3 overflow-y-scroll scrollbar-hidden h-full">
 		<div class="flex flex-col gap-3">
 			<div>
@@ -185,11 +207,13 @@
 							class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 							bind:value={TTS_ENGINE}
 							placeholder="Select a mode"
-							on:change={(e) => {
+							on:change={async (e) => {
 								if (e.target.value === 'openai') {
 									getOpenAIVoices();
 									TTS_VOICE = 'alloy';
 									TTS_MODEL = 'tts-1';
+								} else if(e.target.value === 'elevenlabs') {
+									await fetchAvailableVoices();
 								} else {
 									getWebAPIVoices();
 									TTS_VOICE = '';
@@ -199,7 +223,7 @@
 						>
 							<option value="">{$i18n.t('Web API')}</option>
 							<option value="openai">{$i18n.t('OpenAI')}</option>
-							<option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
+							<option value="elevenlabs">{$i18n.t('Eleven Labs')}</option>
 						</select>
 					</div>
 				</div>
@@ -232,7 +256,7 @@
 
 				<hr class=" dark:border-gray-850 my-2" />
 
-				{#if TTS_ENGINE === ''}
+				{#if TTS_ENGINE !== ''}
 					<div>
 						<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
 						<div class="flex w-full">
@@ -244,9 +268,9 @@
 									<option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
 									{#each voices as voice}
 										<option
-											value={voice.voiceURI}
+											value={voice.name}
 											class="bg-gray-100 dark:bg-gray-700"
-											selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
+											selected={TTS_VOICE === voice.name}>{voice.name}</option
 										>
 									{/each}
 								</select>