diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 4113e9884..167db77ba 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -10,12 +10,12 @@ from fastapi import ( File, Form, ) - from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel +from typing import List import uuid import requests import hashlib @@ -31,6 +31,7 @@ from utils.utils import ( ) from utils.misc import calculate_sha256 + from config import ( SRC_LOG_LEVELS, CACHE_DIR, @@ -134,35 +135,6 @@ def convert_mp4_to_wav(file_path, output_path): print(f"Converted {file_path} to {output_path}") -async def get_available_voices(): - if app.state.config.TTS_ENGINE != "elevenlabs": - return {} - - base_url = "https://api.elevenlabs.io/v1" - headers = { - "xi-api-key": app.state.config.TTS_API_KEY, - "Content-Type": "application/json", - } - - voices_url = f"{base_url}/voices" - try: - response = requests.get(voices_url, headers=headers) - response.raise_for_status() - voices_data = response.json() - - voice_options = {} - for voice in voices_data.get("voices", []): - voice_name = voice["name"] - voice_id = voice["voice_id"] - voice_options[voice_name] = voice_id - - return voice_options - - except requests.RequestException as e: - log.error(f"Error fetching voices: {str(e)}") - return {} - - @app.get("/config") async def get_audio_config(user=Depends(get_admin_user)): return { @@ -281,7 +253,6 @@ async def speech(request: Request, user=Depends(get_verified_user)): ) elif app.state.config.TTS_ENGINE == "elevenlabs": - payload = None try: payload = json.loads(body.decode("utf-8")) @@ -289,12 +260,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): log.exception(e) raise HTTPException(status_code=400, detail="Invalid JSON payload") - voice_options = await get_available_voices() - voice_id = voice_options.get(payload['voice']) - - if not voice_id: - raise HTTPException(status_code=400, detail="Invalid voice name") - + voice_id = payload.get("voice", "") url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { @@ -472,7 +438,67 @@ def transcribe( ) +def get_available_models() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [{"id": "tts-1"}, {"id": "tts-1-hd"}] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/models", headers=headers + ) + response.raise_for_status() + models = response.json() + return [ + {"name": model["name"], "id": model["model_id"]} for model in models + ] + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + return [] + + +@app.get("/models") +async def get_models(user=Depends(get_verified_user)): + return {"models": get_available_models()} + + +def get_available_voices() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [ + {"name": "alloy", "id": "alloy"}, + {"name": "echo", "id": "echo"}, + {"name": "fable", "id": "fable"}, + {"name": "onyx", "id": "onyx"}, + {"name": "nova", "id": "nova"}, + {"name": "shimmer", "id": "shimmer"}, + ] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/voices", headers=headers + ) + response.raise_for_status() + voices_data = response.json() + + voices = [] + for voice in voices_data.get("voices", []): + voices.append({"name": voice["name"], "id": voice["voice_id"]}) + return voices + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + + return [] + + @app.get("/voices") async def get_voices(user=Depends(get_verified_user)): - voices = await get_available_voices() - return {"voices": list(voices.keys())} + return {"voices": get_available_voices()} diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index 9716c552a..af09af990 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async ( return res; }; + +export const getModels = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/models`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getVoices = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 91aa3a612..d59995e26 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -1,13 +1,19 @@ @@ -208,14 +199,14 @@ bind:value={TTS_ENGINE} placeholder="Select a mode" on:change={async (e) => { + await updateConfigHandler(); + await getVoices(); + await getModels(); + if (e.target.value === 'openai') { - getOpenAIVoices(); TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; - } else if(e.target.value === 'elevenlabs') { - await getVoices(); } else { - getWebAPIVoices(); TTS_VOICE = ''; TTS_MODEL = ''; } @@ -256,7 +247,7 @@