diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py
index d410369af..a26355945 100644
--- a/backend/open_webui/routers/audio.py
+++ b/backend/open_webui/routers/audio.py
@@ -113,6 +113,13 @@ def set_faster_whisper_model(model: str, auto_update: bool = False):
return whisper_model
+##########################################
+#
+# Audio API
+#
+##########################################
+
+
class TTSConfigForm(BaseModel):
OPENAI_API_BASE_URL: str
OPENAI_API_KEY: str
@@ -238,35 +245,38 @@ async def speech(request: Request, user=Depends(get_verified_user)):
if file_path.is_file():
return FileResponse(file_path)
+ payload = None
+ try:
+ payload = json.loads(body.decode("utf-8"))
+ except Exception as e:
+ log.exception(e)
+ raise HTTPException(status_code=400, detail="Invalid JSON payload")
+
if request.app.state.config.TTS_ENGINE == "openai":
- headers = {}
- headers["Authorization"] = (
- f"Bearer {request.app.state.config.TTS_OPENAI_API_KEY}"
- )
- headers["Content-Type"] = "application/json"
-
- if ENABLE_FORWARD_USER_INFO_HEADERS:
- headers["X-OpenWebUI-User-Name"] = user.name
- headers["X-OpenWebUI-User-Id"] = user.id
- headers["X-OpenWebUI-User-Email"] = user.email
- headers["X-OpenWebUI-User-Role"] = user.role
-
- try:
- body = body.decode("utf-8")
- body = json.loads(body)
- body["model"] = request.app.state.config.TTS_MODEL
- body = json.dumps(body).encode("utf-8")
- except Exception:
- pass
+ payload["model"] = request.app.state.config.TTS_MODEL
try:
async with aiohttp.ClientSession() as session:
async with session.post(
url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
- data=body,
- headers=headers,
+ data=payload,
+ headers={
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {request.app.state.config.TTS_OPENAI_API_KEY}",
+ **(
+ {
+ "X-OpenWebUI-User-Name": user.name,
+ "X-OpenWebUI-User-Id": user.id,
+ "X-OpenWebUI-User-Email": user.email,
+ "X-OpenWebUI-User-Role": user.role,
+ }
+ if ENABLE_FORWARD_USER_INFO_HEADERS
+ else {}
+ ),
+ },
) as r:
r.raise_for_status()
+
async with aiofiles.open(file_path, "wb") as f:
await f.write(await r.read())
@@ -277,50 +287,47 @@ async def speech(request: Request, user=Depends(get_verified_user)):
except Exception as e:
log.exception(e)
- error_detail = "Open WebUI: Server Connection Error"
+ detail = None
+
try:
if r.status != 200:
res = await r.json()
if "error" in res:
- error_detail = f"External: {res['error']['message']}"
+ detail = f"External: {res['error'].get('message', '')}"
except Exception:
- error_detail = f"External: {e}"
+ detail = f"External: {e}"
raise HTTPException(
status_code=getattr(r, "status", 500),
- detail=error_detail,
+ detail=detail if detail else "Open WebUI: Server Connection Error",
)
elif request.app.state.config.TTS_ENGINE == "elevenlabs":
- try:
- payload = json.loads(body.decode("utf-8"))
- except Exception as e:
- log.exception(e)
- raise HTTPException(status_code=400, detail="Invalid JSON payload")
-
voice_id = payload.get("voice", "")
+
if voice_id not in get_available_voices():
raise HTTPException(
status_code=400,
detail="Invalid voice id",
)
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
- headers = {
- "Accept": "audio/mpeg",
- "Content-Type": "application/json",
- "xi-api-key": request.app.state.config.TTS_API_KEY,
- }
- data = {
- "text": payload["input"],
- "model_id": request.app.state.config.TTS_MODEL,
- "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
- }
-
try:
async with aiohttp.ClientSession() as session:
- async with session.post(url, json=data, headers=headers) as r:
+ async with session.post(
+ f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
+ json={
+ "text": payload["input"],
+ "model_id": request.app.state.config.TTS_MODEL,
+ "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
+ },
+ headers={
+ "Accept": "audio/mpeg",
+ "Content-Type": "application/json",
+ "xi-api-key": request.app.state.config.TTS_API_KEY,
+ },
+ ) as r:
r.raise_for_status()
+
async with aiofiles.open(file_path, "wb") as f:
await f.write(await r.read())
@@ -331,18 +338,19 @@ async def speech(request: Request, user=Depends(get_verified_user)):
except Exception as e:
log.exception(e)
- error_detail = "Open WebUI: Server Connection Error"
+ detail = None
+
try:
if r.status != 200:
res = await r.json()
if "error" in res:
- error_detail = f"External: {res['error']['message']}"
+ detail = f"External: {res['error'].get('message', '')}"
except Exception:
- error_detail = f"External: {e}"
+ detail = f"External: {e}"
raise HTTPException(
status_code=getattr(r, "status", 500),
- detail=error_detail,
+ detail=detail if detail else "Open WebUI: Server Connection Error",
)
elif request.app.state.config.TTS_ENGINE == "azure":
@@ -356,32 +364,45 @@ async def speech(request: Request, user=Depends(get_verified_user)):
language = request.app.state.config.TTS_VOICE
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
- url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1"
-
- headers = {
- "Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
- "Content-Type": "application/ssml+xml",
- "X-Microsoft-OutputFormat": output_format,
- }
-
- data = f"""
- {payload["input"]}
- """
try:
+ data = f"""
+ {payload["input"]}
+ """
async with aiohttp.ClientSession() as session:
- async with session.post(url, headers=headers, data=data) as response:
- if response.status == 200:
- async with aiofiles.open(file_path, "wb") as f:
- await f.write(await response.read())
- return FileResponse(file_path)
- else:
- error_msg = f"Error synthesizing speech - {response.reason}"
- log.error(error_msg)
- raise HTTPException(status_code=500, detail=error_msg)
+ async with session.post(
+ f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
+ headers={
+ "Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
+ "Content-Type": "application/ssml+xml",
+ "X-Microsoft-OutputFormat": output_format,
+ },
+ data=data,
+ ) as r:
+ r.raise_for_status()
+
+ async with aiofiles.open(file_path, "wb") as f:
+ await f.write(await r.read())
+
+ return FileResponse(file_path)
+
except Exception as e:
log.exception(e)
- raise HTTPException(status_code=500, detail=str(e))
+ detail = None
+
+ try:
+ if r.status != 200:
+ res = await r.json()
+ if "error" in res:
+ detail = f"External: {res['error'].get('message', '')}"
+ except Exception:
+ detail = f"External: {e}"
+
+ raise HTTPException(
+ status_code=getattr(r, "status", 500),
+ detail=detail if detail else "Open WebUI: Server Connection Error",
+ )
+
elif request.app.state.config.TTS_ENGINE == "transformers":
payload = None
try: