mirror of
https://github.com/open-webui/open-webui
synced 2025-04-08 14:49:46 +00:00
feat: add FishSpeech TTS engine support
This commit is contained in:
parent
46a5fa1514
commit
7e94acd36c
@ -465,6 +465,71 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
|||||||
|
|
||||||
return FileResponse(file_path)
|
return FileResponse(file_path)
|
||||||
|
|
||||||
|
elif request.app.state.config.TTS_ENGINE == "fishspeech":
|
||||||
|
try:
|
||||||
|
timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT)
|
||||||
|
async with aiohttp.ClientSession(
|
||||||
|
timeout=timeout, trust_env=True
|
||||||
|
) as session:
|
||||||
|
async with session.post(
|
||||||
|
url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/tts",
|
||||||
|
json={
|
||||||
|
"text": payload["input"],
|
||||||
|
"chunk_length": 200,
|
||||||
|
"format": "mp3",
|
||||||
|
"references": [],
|
||||||
|
"reference_id": request.app.state.config.TTS_VOICE,
|
||||||
|
"seed": None,
|
||||||
|
"use_memory_cache": "on",
|
||||||
|
"normalize": True,
|
||||||
|
"streaming": False,
|
||||||
|
"max_new_tokens": 1024,
|
||||||
|
"top_p": 0.7,
|
||||||
|
"repetition_penalty": 1.2,
|
||||||
|
"temperature": 0.7
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {request.app.state.config.TTS_API_KEY}",
|
||||||
|
**(
|
||||||
|
{
|
||||||
|
"X-OpenWebUI-User-Name": user.name,
|
||||||
|
"X-OpenWebUI-User-Id": user.id,
|
||||||
|
"X-OpenWebUI-User-Email": user.email,
|
||||||
|
"X-OpenWebUI-User-Role": user.role,
|
||||||
|
}
|
||||||
|
if ENABLE_FORWARD_USER_INFO_HEADERS
|
||||||
|
else {}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
async with aiofiles.open(file_path, "wb") as f:
|
||||||
|
await f.write(await r.read())
|
||||||
|
|
||||||
|
async with aiofiles.open(file_body_path, "w") as f:
|
||||||
|
await f.write(json.dumps(payload))
|
||||||
|
|
||||||
|
return FileResponse(file_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
detail = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
if r.status != 200:
|
||||||
|
res = await r.json()
|
||||||
|
|
||||||
|
if "error" in res:
|
||||||
|
detail = f"External: {res['error'].get('message', '')}"
|
||||||
|
except Exception:
|
||||||
|
detail = f"External: {e}"
|
||||||
|
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=getattr(r, "status", 500),
|
||||||
|
detail=detail if detail else "Open WebUI: Server Connection Error",
|
||||||
|
)
|
||||||
|
|
||||||
def transcribe(request: Request, file_path):
|
def transcribe(request: Request, file_path):
|
||||||
log.info(f"transcribe: {file_path}")
|
log.info(f"transcribe: {file_path}")
|
||||||
|
@ -364,6 +364,7 @@
|
|||||||
<option value="openai">{$i18n.t('OpenAI')}</option>
|
<option value="openai">{$i18n.t('OpenAI')}</option>
|
||||||
<option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
|
<option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
|
||||||
<option value="azure">{$i18n.t('Azure AI Speech')}</option>
|
<option value="azure">{$i18n.t('Azure AI Speech')}</option>
|
||||||
|
<option value="fishspeech">{$i18n.t('FishSpeech')}</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -409,6 +410,20 @@
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{:else if TTS_ENGINE === 'fishspeech'}
|
||||||
|
<div>
|
||||||
|
<div class="mt-1 flex gap-2 mb-1">
|
||||||
|
<input
|
||||||
|
class="flex-1 w-full bg-transparent outline-hidden"
|
||||||
|
placeholder={$i18n.t('API Base URL')}
|
||||||
|
bind:value={TTS_OPENAI_API_BASE_URL}
|
||||||
|
required
|
||||||
|
title="fishspeech api base url like https://api.fish.audio/v1"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_API_KEY} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
||||||
@ -601,6 +616,45 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{:else if TTS_ENGINE === 'fishspeech'}
|
||||||
|
<div class=" flex gap-2">
|
||||||
|
<div class="w-full">
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
list="voice-list"
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={TTS_VOICE}
|
||||||
|
placeholder="Input fishspeech reference voice id"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
|
||||||
|
The usage of reference voice id can be found in the fishspeech documentation.,
|
||||||
|
<a
|
||||||
|
class=" hover:underline dark:text-gray-200 text-gray-800"
|
||||||
|
href="https://docs.fish.audio/api-reference/endpoint/openapi-v1/text-to-speech#body-reference-id"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
{$i18n.t(`click here`)}.
|
||||||
|
</a>
|
||||||
|
|
||||||
|
To learn more about fishspeech,
|
||||||
|
|
||||||
|
<a
|
||||||
|
class=" hover:underline dark:text-gray-200 text-gray-800"
|
||||||
|
href="https://fish.audio/"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
{$i18n.t(`click here`, {
|
||||||
|
name: 'fishspeech'
|
||||||
|
})}.
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
||||||
|
@ -738,11 +738,12 @@ export const extractSentencesForAudio = (text: string) => {
|
|||||||
if (lastIndex >= 0) {
|
if (lastIndex >= 0) {
|
||||||
const previousText = mergedTexts[lastIndex];
|
const previousText = mergedTexts[lastIndex];
|
||||||
let wordCount = previousText.split(/\s+/).length;
|
let wordCount = previousText.split(/\s+/).length;
|
||||||
const charCount = previousText.length;
|
let charCount = previousText.length;
|
||||||
|
|
||||||
const isCJK = /[\u4e00-\u9fa5\u3040-\u30ff\u31f0-\u31ff\u3400-\u4dbf\u4e00-\u9fff\uF900-\uFAFF]/.test(previousText);
|
const isCJK = /[\u4e00-\u9fa5\u3040-\u30ff\u31f0-\u31ff\u3400-\u4dbf\u4e00-\u9fff\uF900-\uFAFF]/.test(previousText);
|
||||||
if (isCJK) {
|
if (isCJK) {
|
||||||
wordCount = charCount * 3;
|
wordCount = charCount;
|
||||||
|
charCount = charCount * 10;
|
||||||
}
|
}
|
||||||
if (wordCount < 4 || charCount < 50) {
|
if (wordCount < 4 || charCount < 50) {
|
||||||
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
||||||
|
Loading…
Reference in New Issue
Block a user