This commit is contained in:
Timothy J. Baek 2024-08-02 19:24:47 +02:00
parent 3f53abb233
commit b559bc84a7
3 changed files with 177 additions and 104 deletions

View File

@ -10,12 +10,12 @@ from fastapi import (
File, File,
Form, Form,
) )
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from typing import List
import uuid import uuid
import requests import requests
import hashlib import hashlib
@ -31,6 +31,7 @@ from utils.utils import (
) )
from utils.misc import calculate_sha256 from utils.misc import calculate_sha256
from config import ( from config import (
SRC_LOG_LEVELS, SRC_LOG_LEVELS,
CACHE_DIR, CACHE_DIR,
@ -134,35 +135,6 @@ def convert_mp4_to_wav(file_path, output_path):
print(f"Converted {file_path} to {output_path}") print(f"Converted {file_path} to {output_path}")
async def get_available_voices():
if app.state.config.TTS_ENGINE != "elevenlabs":
return {}
base_url = "https://api.elevenlabs.io/v1"
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
voices_url = f"{base_url}/voices"
try:
response = requests.get(voices_url, headers=headers)
response.raise_for_status()
voices_data = response.json()
voice_options = {}
for voice in voices_data.get("voices", []):
voice_name = voice["name"]
voice_id = voice["voice_id"]
voice_options[voice_name] = voice_id
return voice_options
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return {}
@app.get("/config") @app.get("/config")
async def get_audio_config(user=Depends(get_admin_user)): async def get_audio_config(user=Depends(get_admin_user)):
return { return {
@ -281,7 +253,6 @@ async def speech(request: Request, user=Depends(get_verified_user)):
) )
elif app.state.config.TTS_ENGINE == "elevenlabs": elif app.state.config.TTS_ENGINE == "elevenlabs":
payload = None payload = None
try: try:
payload = json.loads(body.decode("utf-8")) payload = json.loads(body.decode("utf-8"))
@ -289,12 +260,7 @@ async def speech(request: Request, user=Depends(get_verified_user)):
log.exception(e) log.exception(e)
raise HTTPException(status_code=400, detail="Invalid JSON payload") raise HTTPException(status_code=400, detail="Invalid JSON payload")
voice_options = await get_available_voices() voice_id = payload.get("voice", "")
voice_id = voice_options.get(payload['voice'])
if not voice_id:
raise HTTPException(status_code=400, detail="Invalid voice name")
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
headers = { headers = {
@ -472,7 +438,67 @@ def transcribe(
) )
def get_available_models() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/models", headers=headers
)
response.raise_for_status()
models = response.json()
return [
{"name": model["name"], "id": model["model_id"]} for model in models
]
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/models")
async def get_models(user=Depends(get_verified_user)):
return {"models": get_available_models()}
def get_available_voices() -> List[dict]:
if app.state.config.TTS_ENGINE == "openai":
return [
{"name": "alloy", "id": "alloy"},
{"name": "echo", "id": "echo"},
{"name": "fable", "id": "fable"},
{"name": "onyx", "id": "onyx"},
{"name": "nova", "id": "nova"},
{"name": "shimmer", "id": "shimmer"},
]
elif app.state.config.TTS_ENGINE == "elevenlabs":
headers = {
"xi-api-key": app.state.config.TTS_API_KEY,
"Content-Type": "application/json",
}
try:
response = requests.get(
"https://api.elevenlabs.io/v1/voices", headers=headers
)
response.raise_for_status()
voices_data = response.json()
voices = []
for voice in voices_data.get("voices", []):
voices.append({"name": voice["name"], "id": voice["voice_id"]})
return voices
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return []
@app.get("/voices") @app.get("/voices")
async def get_voices(user=Depends(get_verified_user)): async def get_voices(user=Depends(get_verified_user)):
voices = await get_available_voices() return {"voices": get_available_voices()}
return {"voices": list(voices.keys())}

View File

@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async (
return res; return res;
}; };
export const getModels = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const getVoices = async (token: string = '') => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
error = err.detail;
console.log(err);
return null;
});
if (error) {
throw error;
}
return res;
};

View File

@ -1,13 +1,19 @@
<script lang="ts"> <script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import Switch from '$lib/components/common/Switch.svelte'; import { createEventDispatcher, onMount, getContext } from 'svelte';
import { getBackendConfig } from '$lib/apis';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
import { getBackendConfig } from '$lib/apis';
import {
getAudioConfig,
updateAudioConfig,
getModels as _getModels,
getVoices as _getVoices
} from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const i18n = getContext('i18n'); const i18n = getContext('i18n');
export let saveHandler: Function; export let saveHandler: Function;
@ -30,48 +36,42 @@
let models = []; let models = [];
let nonLocalVoices = false; let nonLocalVoices = false;
const getOpenAIVoices = () => { const getModels = async () => {
voices = [ if (TTS_ENGINE === '') {
{ name: 'alloy' }, models = [];
{ name: 'echo' }, } else {
{ name: 'fable' }, const res = await _getModels(localStorage.token).catch((e) => {
{ name: 'onyx' }, toast.error(e);
{ name: 'nova' }, });
{ name: 'shimmer' }
];
};
const getOpenAIModels = () => { if (res) {
models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }]; console.log(res);
}; models = res.models;
const getWebAPIVoices = () => {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
} }
}, 100); }
}; };
// Fetch available ElevenLabs voices const getVoices = async () => {
const getVoices = async () => { if (TTS_ENGINE === '') {
const response = await fetch('/voices', { const getVoicesLoop = setInterval(async () => {
method: 'GET', voices = await speechSynthesis.getVoices();
headers: {
'Authorization': `Bearer ${localStorage.token}`
}
});
if (response.ok) { // do your loop
const data = await response.json(); if (voices.length > 0) {
voices = data.voices.map(name => ({ name })); // Update voices array with fetched names clearInterval(getVoicesLoop);
} else { }
toast.error('Failed to fetch voices'); }, 100);
} } else {
}; const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(e);
});
if (res) {
console.log(res);
voices = res.voices;
}
}
};
const updateConfigHandler = async () => { const updateConfigHandler = async () => {
const res = await updateAudioConfig(localStorage.token, { const res = await updateAudioConfig(localStorage.token, {
@ -99,9 +99,6 @@
}; };
onMount(async () => { onMount(async () => {
// Fetch available voices on component mount
await getVoices();
const res = await getAudioConfig(localStorage.token); const res = await getAudioConfig(localStorage.token);
if (res) { if (res) {
@ -121,14 +118,8 @@
STT_MODEL = res.stt.MODEL; STT_MODEL = res.stt.MODEL;
} }
if (TTS_ENGINE === 'openai') { await getVoices();
getOpenAIVoices(); await getModels();
getOpenAIModels();
} else if(TTS_ENGINE === 'elevenlabs') {
await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs
} else {
getWebAPIVoices();
}
}); });
</script> </script>
@ -208,14 +199,14 @@
bind:value={TTS_ENGINE} bind:value={TTS_ENGINE}
placeholder="Select a mode" placeholder="Select a mode"
on:change={async (e) => { on:change={async (e) => {
await updateConfigHandler();
await getVoices();
await getModels();
if (e.target.value === 'openai') { if (e.target.value === 'openai') {
getOpenAIVoices();
TTS_VOICE = 'alloy'; TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1'; TTS_MODEL = 'tts-1';
} else if(e.target.value === 'elevenlabs') {
await getVoices();
} else { } else {
getWebAPIVoices();
TTS_VOICE = ''; TTS_VOICE = '';
TTS_MODEL = ''; TTS_MODEL = '';
} }
@ -256,7 +247,7 @@
<hr class=" dark:border-gray-850 my-2" /> <hr class=" dark:border-gray-850 my-2" />
{#if TTS_ENGINE !== ''} {#if TTS_ENGINE === ''}
<div> <div>
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div> <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
<div class="flex w-full"> <div class="flex w-full">
@ -268,9 +259,9 @@
<option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option> <option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
{#each voices as voice} {#each voices as voice}
<option <option
value={voice.name} value={voice.voiceURI}
class="bg-gray-100 dark:bg-gray-700" class="bg-gray-100 dark:bg-gray-700"
selected={TTS_VOICE === voice.name}>{voice.name}</option selected={TTS_VOICE === voice.voiceURI}>{voice.name}</option
> >
{/each} {/each}
</select> </select>
@ -292,7 +283,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -311,7 +302,7 @@
<datalist id="model-list"> <datalist id="model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -333,7 +324,7 @@
<datalist id="voice-list"> <datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} /> <option value={voice.id}>{voice.name}</option>
{/each} {/each}
</datalist> </datalist>
</div> </div>
@ -352,7 +343,7 @@
<datalist id="model-list"> <datalist id="model-list">
{#each models as model} {#each models as model}
<option value={model.name} /> <option value={model.id} />
{/each} {/each}
</datalist> </datalist>
</div> </div>