From 0216ee101c2793dbd227c29ddf5a345a9d8899f4 Mon Sep 17 00:00:00 2001 From: Justin Hayes <52832301+justinh-rahb@users.noreply.github.com> Date: Sat, 20 Jul 2024 02:56:00 -0400 Subject: [PATCH 1/6] enh: ElevenLabs voice name -> ID --- backend/apps/audio/main.py | 45 ++++++++++++++++++- .../components/admin/Settings/Audio.svelte | 36 ++++++++++++--- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index c565bf481..6c3695616 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -134,6 +134,35 @@ def convert_mp4_to_wav(file_path, output_path): print(f"Converted {file_path} to {output_path}") +async def fetch_available_voices(): + if app.state.config.TTS_ENGINE != "elevenlabs": + return {} + + base_url = "https://api.elevenlabs.io/v1" + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + voices_url = f"{base_url}/voices" + try: + response = requests.get(voices_url, headers=headers) + response.raise_for_status() + voices_data = response.json() + + voice_options = {} + for voice in voices_data.get("voices", []): + voice_name = voice["name"] + voice_id = voice["voice_id"] + voice_options[voice_name] = voice_id + + return voice_options + + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + return {} + + @app.get("/config") async def get_audio_config(user=Depends(get_admin_user)): return { @@ -258,9 +287,15 @@ async def speech(request: Request, user=Depends(get_verified_user)): payload = json.loads(body.decode("utf-8")) except Exception as e: log.exception(e) - pass + raise HTTPException(status_code=400, detail="Invalid JSON payload") - url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}" + voice_options = await fetch_available_voices() + voice_id = voice_options.get(payload['voice']) + + if not voice_id: + raise HTTPException(status_code=400, detail="Invalid voice name") + + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { "Accept": "audio/mpeg", @@ -435,3 +470,9 @@ def transcribe( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT(e), ) + + +@app.get("/voices") +async def get_voices(user=Depends(get_verified_user)): + voices = await fetch_available_voices() + return {"voices": list(voices.keys())} diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 50ce7418e..c3d36b093 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -56,6 +56,23 @@ }, 100); }; + // Fetch available ElevenLabs voices + const fetchAvailableVoices = async () => { + const response = await fetch('/voices', { + method: 'GET', + headers: { + 'Authorization': `Bearer ${localStorage.token}` + } + }); + + if (response.ok) { + const data = await response.json(); + voices = data.voices.map(name => ({ name })); // Update voices array with fetched names + } else { + toast.error('Failed to fetch voices'); + } + }; + const updateConfigHandler = async () => { const res = await updateAudioConfig(localStorage.token, { tts: { @@ -82,6 +99,9 @@ }; onMount(async () => { + // Fetch available voices on component mount + await fetchAvailableVoices(); + const res = await getAudioConfig(localStorage.token); if (res) { @@ -104,6 +124,8 @@ if (TTS_ENGINE === 'openai') { getOpenAIVoices(); getOpenAIModels(); + } else if(TTS_ENGINE === 'elevenlabs') { + await fetchAvailableVoices(); // Fetch voices if TTS_ENGINE is ElevenLabs } else { getWebAPIVoices(); } @@ -116,7 +138,7 @@ await updateConfigHandler(); dispatch('save'); }} -> + >
@@ -185,11 +207,13 @@ class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" bind:value={TTS_ENGINE} placeholder="Select a mode" - on:change={(e) => { + on:change={async (e) => { if (e.target.value === 'openai') { getOpenAIVoices(); TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; + } else if(e.target.value === 'elevenlabs') { + await fetchAvailableVoices(); } else { getWebAPIVoices(); TTS_VOICE = ''; @@ -199,7 +223,7 @@ > - +
@@ -232,7 +256,7 @@
- {#if TTS_ENGINE === ''} + {#if TTS_ENGINE !== ''}
{$i18n.t('TTS Voice')}
@@ -244,9 +268,9 @@ {#each voices as voice} {voice.name} {/each} From 6cecb964a9976cf30837933eeb67dbb560838d0d Mon Sep 17 00:00:00 2001 From: Justin Hayes <52832301+justinh-rahb@users.noreply.github.com> Date: Sat, 20 Jul 2024 03:07:50 -0400 Subject: [PATCH 2/6] name --- src/lib/components/admin/Settings/Audio.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index c3d36b093..4979d8cc4 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -138,7 +138,7 @@ await updateConfigHandler(); dispatch('save'); }} - > +>
@@ -223,7 +223,7 @@ > - +
From c6c298b412ab4089dc149e693a5756077f92a1ca Mon Sep 17 00:00:00 2001 From: Justin Hayes <52832301+justinh-rahb@users.noreply.github.com> Date: Sat, 20 Jul 2024 11:02:59 -0400 Subject: [PATCH 3/6] Fetch didn't happen --- backend/apps/audio/main.py | 6 +++--- src/lib/components/admin/Settings/Audio.svelte | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 6c3695616..4113e9884 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -134,7 +134,7 @@ def convert_mp4_to_wav(file_path, output_path): print(f"Converted {file_path} to {output_path}") -async def fetch_available_voices(): +async def get_available_voices(): if app.state.config.TTS_ENGINE != "elevenlabs": return {} @@ -289,7 +289,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): log.exception(e) raise HTTPException(status_code=400, detail="Invalid JSON payload") - voice_options = await fetch_available_voices() + voice_options = await get_available_voices() voice_id = voice_options.get(payload['voice']) if not voice_id: @@ -474,5 +474,5 @@ def transcribe( @app.get("/voices") async def get_voices(user=Depends(get_verified_user)): - voices = await fetch_available_voices() + voices = await get_available_voices() return {"voices": list(voices.keys())} diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 4979d8cc4..91aa3a612 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -57,7 +57,7 @@ }; // Fetch available ElevenLabs voices - const fetchAvailableVoices = async () => { + const getVoices = async () => { const response = await fetch('/voices', { method: 'GET', headers: { @@ -100,7 +100,7 @@ onMount(async () => { // Fetch available voices on component mount - await fetchAvailableVoices(); + await getVoices(); const res = await getAudioConfig(localStorage.token); @@ -125,7 +125,7 @@ getOpenAIVoices(); getOpenAIModels(); } else if(TTS_ENGINE === 'elevenlabs') { - await fetchAvailableVoices(); // Fetch voices if TTS_ENGINE is ElevenLabs + await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs } else { getWebAPIVoices(); } @@ -213,7 +213,7 @@ TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; } else if(e.target.value === 'elevenlabs') { - await fetchAvailableVoices(); + await getVoices(); } else { getWebAPIVoices(); TTS_VOICE = ''; From b559bc84a752a12e0ffee838ded5d8da89a609ae Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 2 Aug 2024 19:24:47 +0200 Subject: [PATCH 4/6] refac --- backend/apps/audio/main.py | 104 +++++++++------ src/lib/apis/audio/index.ts | 56 ++++++++ .../components/admin/Settings/Audio.svelte | 121 ++++++++---------- 3 files changed, 177 insertions(+), 104 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 4113e9884..167db77ba 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -10,12 +10,12 @@ from fastapi import ( File, Form, ) - from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel +from typing import List import uuid import requests import hashlib @@ -31,6 +31,7 @@ from utils.utils import ( ) from utils.misc import calculate_sha256 + from config import ( SRC_LOG_LEVELS, CACHE_DIR, @@ -134,35 +135,6 @@ def convert_mp4_to_wav(file_path, output_path): print(f"Converted {file_path} to {output_path}") -async def get_available_voices(): - if app.state.config.TTS_ENGINE != "elevenlabs": - return {} - - base_url = "https://api.elevenlabs.io/v1" - headers = { - "xi-api-key": app.state.config.TTS_API_KEY, - "Content-Type": "application/json", - } - - voices_url = f"{base_url}/voices" - try: - response = requests.get(voices_url, headers=headers) - response.raise_for_status() - voices_data = response.json() - - voice_options = {} - for voice in voices_data.get("voices", []): - voice_name = voice["name"] - voice_id = voice["voice_id"] - voice_options[voice_name] = voice_id - - return voice_options - - except requests.RequestException as e: - log.error(f"Error fetching voices: {str(e)}") - return {} - - @app.get("/config") async def get_audio_config(user=Depends(get_admin_user)): return { @@ -281,7 +253,6 @@ async def speech(request: Request, user=Depends(get_verified_user)): ) elif app.state.config.TTS_ENGINE == "elevenlabs": - payload = None try: payload = json.loads(body.decode("utf-8")) @@ -289,12 +260,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): log.exception(e) raise HTTPException(status_code=400, detail="Invalid JSON payload") - voice_options = await get_available_voices() - voice_id = voice_options.get(payload['voice']) - - if not voice_id: - raise HTTPException(status_code=400, detail="Invalid voice name") - + voice_id = payload.get("voice", "") url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { @@ -472,7 +438,67 @@ def transcribe( ) +def get_available_models() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [{"id": "tts-1"}, {"id": "tts-1-hd"}] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/models", headers=headers + ) + response.raise_for_status() + models = response.json() + return [ + {"name": model["name"], "id": model["model_id"]} for model in models + ] + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + return [] + + +@app.get("/models") +async def get_models(user=Depends(get_verified_user)): + return {"models": get_available_models()} + + +def get_available_voices() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [ + {"name": "alloy", "id": "alloy"}, + {"name": "echo", "id": "echo"}, + {"name": "fable", "id": "fable"}, + {"name": "onyx", "id": "onyx"}, + {"name": "nova", "id": "nova"}, + {"name": "shimmer", "id": "shimmer"}, + ] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/voices", headers=headers + ) + response.raise_for_status() + voices_data = response.json() + + voices = [] + for voice in voices_data.get("voices", []): + voices.append({"name": voice["name"], "id": voice["voice_id"]}) + return voices + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + + return [] + + @app.get("/voices") async def get_voices(user=Depends(get_verified_user)): - voices = await get_available_voices() - return {"voices": list(voices.keys())} + return {"voices": get_available_voices()} diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index 9716c552a..af09af990 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async ( return res; }; + +export const getModels = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/models`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getVoices = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 91aa3a612..d59995e26 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -1,13 +1,19 @@ @@ -208,14 +199,14 @@ bind:value={TTS_ENGINE} placeholder="Select a mode" on:change={async (e) => { + await updateConfigHandler(); + await getVoices(); + await getModels(); + if (e.target.value === 'openai') { - getOpenAIVoices(); TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; - } else if(e.target.value === 'elevenlabs') { - await getVoices(); } else { - getWebAPIVoices(); TTS_VOICE = ''; TTS_MODEL = ''; } @@ -256,7 +247,7 @@
- {#if TTS_ENGINE !== ''} + {#if TTS_ENGINE === ''}
{$i18n.t('TTS Voice')}
@@ -268,9 +259,9 @@ {#each voices as voice} {voice.name} {/each} @@ -292,7 +283,7 @@ {#each voices as voice} - {/each}
@@ -311,7 +302,7 @@ {#each models as model} -
@@ -333,7 +324,7 @@ {#each voices as voice} - {/each}
@@ -352,7 +343,7 @@ {#each models as model} -
From c416444e242c5d9b0097ee2bacaf3192b2d15bf7 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 2 Aug 2024 19:26:34 +0200 Subject: [PATCH 5/6] fix --- src/lib/components/admin/Settings/Audio.svelte | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index d59995e26..7c3300568 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -294,13 +294,13 @@
- + {#each models as model}