From 7e94acd36c8e84ca485d4a7c7ebba81f139b9fdf Mon Sep 17 00:00:00 2001 From: georgechen Date: Wed, 5 Mar 2025 23:20:08 +0800 Subject: [PATCH] feat: add FishSpeech TTS engine support --- backend/open_webui/routers/audio.py | 65 +++++++++++++++++++ .../components/admin/Settings/Audio.svelte | 54 +++++++++++++++ src/lib/utils/index.ts | 5 +- 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index c949e65a4..18d32167f 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -464,7 +464,72 @@ async def speech(request: Request, user=Depends(get_verified_user)): await f.write(json.dumps(payload)) return FileResponse(file_path) + + elif request.app.state.config.TTS_ENGINE == "fishspeech": + try: + timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT) + async with aiohttp.ClientSession( + timeout=timeout, trust_env=True + ) as session: + async with session.post( + url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/tts", + json={ + "text": payload["input"], + "chunk_length": 200, + "format": "mp3", + "references": [], + "reference_id": request.app.state.config.TTS_VOICE, + "seed": None, + "use_memory_cache": "on", + "normalize": True, + "streaming": False, + "max_new_tokens": 1024, + "top_p": 0.7, + "repetition_penalty": 1.2, + "temperature": 0.7 + }, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {request.app.state.config.TTS_API_KEY}", + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS + else {} + ), + }, + ) as r: + r.raise_for_status() + async with aiofiles.open(file_path, "wb") as f: + await f.write(await r.read()) + + async with aiofiles.open(file_body_path, "w") as f: + await f.write(json.dumps(payload)) + + return FileResponse(file_path) + + except Exception as e: + log.exception(e) + detail = None + + try: + if r.status != 200: + res = await r.json() + + if "error" in res: + detail = f"External: {res['error'].get('message', '')}" + except Exception: + detail = f"External: {e}" + + raise HTTPException( + status_code=getattr(r, "status", 500), + detail=detail if detail else "Open WebUI: Server Connection Error", + ) def transcribe(request: Request, file_path): log.info(f"transcribe: {file_path}") diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index d36f4af6c..1928c3af9 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -364,6 +364,7 @@ + @@ -409,6 +410,20 @@ /> + {:else if TTS_ENGINE === 'fishspeech'} +
+
+ + + +
+
{/if}
@@ -601,6 +616,45 @@ + {:else if TTS_ENGINE === 'fishspeech'} +
+
+
{$i18n.t('TTS Voice')}
+
+
+ +
+
+
+ The usage of reference voice id can be found in the fishspeech documentation., + + {$i18n.t(`click here`)}. + + + To learn more about fishspeech, + + + {$i18n.t(`click here`, { + name: 'fishspeech' + })}. + + +
+
+
{/if}
diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 9ce6fb301..6d6e465dd 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -738,11 +738,12 @@ export const extractSentencesForAudio = (text: string) => { if (lastIndex >= 0) { const previousText = mergedTexts[lastIndex]; let wordCount = previousText.split(/\s+/).length; - const charCount = previousText.length; + let charCount = previousText.length; const isCJK = /[\u4e00-\u9fa5\u3040-\u30ff\u31f0-\u31ff\u3400-\u4dbf\u4e00-\u9fff\uF900-\uFAFF]/.test(previousText); if (isCJK) { - wordCount = charCount * 3; + wordCount = charCount; + charCount = charCount * 10; } if (wordCount < 4 || charCount < 50) { mergedTexts[lastIndex] = previousText + ' ' + currentText;