Merge pull request #4299 from open-webui/dev-elevenlabs

feat: Fetch ElevenLabs voice ID by name
This commit is contained in:
Timothy Jaeryang Baek 2024-08-02 19:29:24 +02:00 committed by GitHub
commit 9d5e3e2a91
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 211 additions and 74 deletions

View File

@ -10,12 +10,12 @@ from fastapi import (
File, File,
Form, Form,
) )
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from typing import List
import uuid import uuid
import requests import requests
import hashlib import hashlib
@ -31,6 +31,7 @@ from utils.utils import (
) )
from utils.misc import calculate_sha256 from utils.misc import calculate_sha256
from config import ( from config import (
SRC_LOG_LEVELS, SRC_LOG_LEVELS,
CACHE_DIR, CACHE_DIR,
@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
) )
elif app.state.config.TTS_ENGINE == "elevenlabs": elif app.state.config.TTS_ENGINE == "elevenlabs":
payload = None payload = None
try: try:
payload = json.loads(body.decode("utf-8")) payload = json.loads(body.decode("utf-8"))
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
pass raise HTTPException(status_code=400, detail="Invalid JSON payload")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}" voice_id = payload.get("voice", "")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = { headers = {
"Accept": "audio/mpeg", "Accept": "audio/mpeg",
@ -435,3 +436,69 @@ def transcribe(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e), detail=ERROR_MESSAGES.DEFAULT(e),
) )
def get_available_models() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/models", headers=headers
)
response.raise_for_status()
models = response.json()
return [
{"name": model["name"], "id": model["model_id"]} for model in models
]
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/models")
async def get_models(user=Depends(get_verified_user)):
return {"models": get_available_models()}
def get_available_voices() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [
{"name": "alloy", "id": "alloy"},
{"name": "echo", "id": "echo"},
{"name": "fable", "id": "fable"},
{"name": "onyx", "id": "onyx"},
{"name": "nova", "id": "nova"},
{"name": "shimmer", "id": "shimmer"},
]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/voices", headers=headers
)
response.raise_for_status()
voices_data = response.json()
voices = []
for voice in voices_data.get("voices", []):
voices.append({"name": voice["name"], "id": voice["voice_id"]})
return voices
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/voices")
async def get_voices(user=Depends(get_verified_user)):
return {"voices": get_available_voices()}

View File

@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async (
return res; return res;
}; };
export const getModels = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const getVoices = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};

View File

@ -1,13 +1,19 @@
<script lang="ts"> <script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import Switch from '$lib/components/common/Switch.svelte'; import { createEventDispatcher, onMount, getContext } from 'svelte';
import { getBackendConfig } from '$lib/apis';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
import { getBackendConfig } from '$lib/apis';
import {
getAudioConfig,
updateAudioConfig,
getModels as _getModels,
getVoices as _getVoices
} from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const i18n = getContext('i18n'); const i18n = getContext('i18n');
export let saveHandler: Function; export let saveHandler: Function;
@ -30,30 +36,41 @@
let models = []; let models = [];
let nonLocalVoices = false; let nonLocalVoices = false;
const getOpenAIVoices = () => { const getModels = async () => {
voices = [ if (TTS_ENGINE === '') {
{ name: 'alloy' }, models = [];
{ name: 'echo' }, } else {
{ name: 'fable' }, const res = await _getModels(localStorage.token).catch((e) => {
{ name: 'onyx' }, toast.error(e);
{ name: 'nova' }, });
{ name: 'shimmer' }
];
};
const getOpenAIModels = () => { if (res) {
models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }]; console.log(res);
}; models = res.models;
const getWebAPIVoices = () => {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
} }
}, 100); }
};
const getVoices = async () => {
if (TTS_ENGINE === '') {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(e);
});
if (res) {
console.log(res);
voices = res.voices;
}
}
}; };
const updateConfigHandler = async () => { const updateConfigHandler = async () => {
@ -101,12 +118,8 @@
STT_MODEL = res.stt.MODEL; STT_MODEL = res.stt.MODEL;
} }
if (TTS_ENGINE === 'openai') { await getVoices();
getOpenAIVoices(); await getModels();
getOpenAIModels();
} else {
getWebAPIVoices();
}
}); });
</script> </script>
@ -185,13 +198,15 @@
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTS_ENGINE} bind:value={TTS_ENGINE}
placeholder="Select a mode" placeholder="Select a mode"
on:change={(e) => { on:change={async (e) => {
await updateConfigHandler();
await getVoices();
await getModels();
if (e.target.value === 'openai') { if (e.target.value === 'openai') {
getOpenAIVoices();
TTS_VOICE = 'alloy'; TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1'; TTS_MODEL = 'tts-1';
} else { } else {
getWebAPIVoices();
TTS_VOICE = ''; TTS_VOICE = '';
TTS_MODEL = ''; TTS_MODEL = '';
} }
@ -268,7 +283,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -279,15 +294,15 @@
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<input <input
list="model-list" list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL} bind:value={TTS_MODEL}
placeholder="Select a model" placeholder="Select a model"
/> />
<datalist id="model-list"> <datalist id="tts-model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -309,7 +324,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -320,15 +335,15 @@
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<input <input
list="model-list" list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL} bind:value={TTS_MODEL}
placeholder="Select a model" placeholder="Select a model"
/> />
<datalist id="model-list"> <datalist id="tts-model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>

View File

@ -1,7 +1,10 @@
<script lang="ts"> <script lang="ts">
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { user, settings, config } from '$lib/stores';
import { getVoices as _getVoices } from '$lib/apis/audio';
import Switch from '$lib/components/common/Switch.svelte'; import Switch from '$lib/components/common/Switch.svelte';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
@ -20,26 +23,26 @@
let voices = []; let voices = [];
let voice = ''; let voice = '';
const getOpenAIVoices = () => { const getVoices = async () => {
voices = [ if ($config.audio.tts.engine === '') {
{ name: 'alloy' }, const getVoicesLoop = setInterval(async () => {
{ name: 'echo' }, voices = await speechSynthesis.getVoices();
{ name: 'fable' },
{ name: 'onyx' },
{ name: 'nova' },
{ name: 'shimmer' }
];
};
const getWebAPIVoices = () => { // do your loop
const getVoicesLoop = setInterval(async () => { if (voices.length > 0) {
voices = await speechSynthesis.getVoices(); clearInterval(getVoicesLoop);
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(e);
});
// do your loop if (res) {
if (voices.length > 0) { console.log(res);
clearInterval(getVoicesLoop); voices = res.voices;
} }
}, 100); }
}; };
const toggleResponseAutoPlayback = async () => { const toggleResponseAutoPlayback = async () => {
@ -61,11 +64,7 @@
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? ''; voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false; nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
if ($config.audio.tts.engine === 'openai') { await getVoices();
getOpenAIVoices();
} else {
getWebAPIVoices();
}
}); });
</script> </script>
@ -195,7 +194,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>