Merge pull request #4018 from justinh-rahb/elevenlabs-voice-names

feat: Fetch ElevenLabs voice ID by name
This commit is contained in:
Timothy Jaeryang Baek 2024-08-02 18:44:48 +02:00 committed by GitHub
commit 3f53abb233
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 71 additions and 6 deletions

View File

@ -134,6 +134,35 @@ def convert_mp4_to_wav(file_path, output_path):
print(f"Converted {file_path} to {output_path}")
async def get_available_voices():
if app.state.config.TTS_ENGINE != "elevenlabs":
return {}
base_url = "https://api.elevenlabs.io/v1"
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
voices_url = f"{base_url}/voices"
try:
response = requests.get(voices_url, headers=headers)
response.raise_for_status()
voices_data = response.json()
voice_options = {}
for voice in voices_data.get("voices", []):
voice_name = voice["name"]
voice_id = voice["voice_id"]
voice_options[voice_name] = voice_id
return voice_options
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return {}
@app.get("/config")
async def get_audio_config(user=Depends(get_admin_user)):
return {
@ -258,9 +287,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
payload = json.loads(body.decode("utf-8"))
except Exception as e:
log.exception(e)
pass
raise HTTPException(status_code=400, detail="Invalid JSON payload")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
voice_options = await get_available_voices()
voice_id = voice_options.get(payload['voice'])
if not voice_id:
raise HTTPException(status_code=400, detail="Invalid voice name")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = {
"Accept": "audio/mpeg",
@ -435,3 +470,9 @@ def transcribe(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
@app.get("/voices")
async def get_voices(user=Depends(get_verified_user)):
voices = await get_available_voices()
return {"voices": list(voices.keys())}

View File

@ -56,6 +56,23 @@
}, 100);
};
// Fetch available ElevenLabs voices
const getVoices = async () => {
const response = await fetch('/voices', {
method: 'GET',
headers: {
'Authorization': `Bearer ${localStorage.token}`
}
});
if (response.ok) {
const data = await response.json();
voices = data.voices.map(name => ({ name })); // Update voices array with fetched names
} else {
toast.error('Failed to fetch voices');
}
};
const updateConfigHandler = async () => {
const res = await updateAudioConfig(localStorage.token, {
tts: {
@ -82,6 +99,9 @@
};
onMount(async () => {
// Fetch available voices on component mount
await getVoices();
const res = await getAudioConfig(localStorage.token);
if (res) {
@ -104,6 +124,8 @@
if (TTS_ENGINE === 'openai') {
getOpenAIVoices();
getOpenAIModels();
} else if(TTS_ENGINE === 'elevenlabs') {
await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs
} else {
getWebAPIVoices();
}
@ -185,11 +207,13 @@
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTS_ENGINE}
placeholder="Select a mode"
on:change={(e) => {
on:change={async (e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1';
} else if(e.target.value === 'elevenlabs') {
await getVoices();
} else {
getWebAPIVoices();
TTS_VOICE = '';
@ -232,7 +256,7 @@
<hr class=" dark:border-gray-850 my-2" />
{#if TTS_ENGINE === ''}
{#if TTS_ENGINE !== ''}
<div>
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
<div class="flex w-full">
@ -244,9 +268,9 @@
<option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
{#each voices as voice}
<option
value={voice.voiceURI}
value={voice.name}
class="bg-gray-100 dark:bg-gray-700"
selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
selected={TTS_VOICE === voice.name}>{voice.name}</option
>
{/each}
</select>