mirror of
https://github.com/open-webui/open-webui
synced 2025-06-12 17:33:11 +00:00
Add Custom Azure TTS URL
This commit is contained in:
parent
23b9354cf6
commit
5aabe21cbe
@ -2689,7 +2689,7 @@ AUDIO_STT_AZURE_BASE_URL = PersistentConfig(
|
|||||||
AUDIO_STT_AZURE_MAX_SPEAKERS = PersistentConfig(
|
AUDIO_STT_AZURE_MAX_SPEAKERS = PersistentConfig(
|
||||||
"AUDIO_STT_AZURE_MAX_SPEAKERS",
|
"AUDIO_STT_AZURE_MAX_SPEAKERS",
|
||||||
"audio.stt.azure.max_speakers",
|
"audio.stt.azure.max_speakers",
|
||||||
os.getenv("AUDIO_STT_AZURE_MAX_SPEAKERS", "3"),
|
os.getenv("AUDIO_STT_AZURE_MAX_SPEAKERS", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig(
|
AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig(
|
||||||
@ -2737,7 +2737,13 @@ AUDIO_TTS_SPLIT_ON = PersistentConfig(
|
|||||||
AUDIO_TTS_AZURE_SPEECH_REGION = PersistentConfig(
|
AUDIO_TTS_AZURE_SPEECH_REGION = PersistentConfig(
|
||||||
"AUDIO_TTS_AZURE_SPEECH_REGION",
|
"AUDIO_TTS_AZURE_SPEECH_REGION",
|
||||||
"audio.tts.azure.speech_region",
|
"audio.tts.azure.speech_region",
|
||||||
os.getenv("AUDIO_TTS_AZURE_SPEECH_REGION", "eastus"),
|
os.getenv("AUDIO_TTS_AZURE_SPEECH_REGION", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
AUDIO_TTS_AZURE_SPEECH_BASE_URL = PersistentConfig(
|
||||||
|
"AUDIO_TTS_AZURE_SPEECH_BASE_URL",
|
||||||
|
"audio.tts.azure.speech_base_url",
|
||||||
|
os.getenv("AUDIO_TTS_AZURE_SPEECH_BASE_URL", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT = PersistentConfig(
|
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT = PersistentConfig(
|
||||||
|
@ -166,6 +166,7 @@ from open_webui.config import (
|
|||||||
AUDIO_TTS_SPLIT_ON,
|
AUDIO_TTS_SPLIT_ON,
|
||||||
AUDIO_TTS_VOICE,
|
AUDIO_TTS_VOICE,
|
||||||
AUDIO_TTS_AZURE_SPEECH_REGION,
|
AUDIO_TTS_AZURE_SPEECH_REGION,
|
||||||
|
AUDIO_TTS_AZURE_SPEECH_BASE_URL,
|
||||||
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||||
PLAYWRIGHT_WS_URL,
|
PLAYWRIGHT_WS_URL,
|
||||||
PLAYWRIGHT_TIMEOUT,
|
PLAYWRIGHT_TIMEOUT,
|
||||||
@ -852,6 +853,7 @@ app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
|
|||||||
|
|
||||||
|
|
||||||
app.state.config.TTS_AZURE_SPEECH_REGION = AUDIO_TTS_AZURE_SPEECH_REGION
|
app.state.config.TTS_AZURE_SPEECH_REGION = AUDIO_TTS_AZURE_SPEECH_REGION
|
||||||
|
app.state.config.TTS_AZURE_SPEECH_BASE_URL = AUDIO_TTS_AZURE_SPEECH_BASE_URL
|
||||||
app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
||||||
|
|
||||||
|
|
||||||
|
@ -138,6 +138,7 @@ class TTSConfigForm(BaseModel):
|
|||||||
VOICE: str
|
VOICE: str
|
||||||
SPLIT_ON: str
|
SPLIT_ON: str
|
||||||
AZURE_SPEECH_REGION: str
|
AZURE_SPEECH_REGION: str
|
||||||
|
AZURE_SPEECH_BASE_URL: str
|
||||||
AZURE_SPEECH_OUTPUT_FORMAT: str
|
AZURE_SPEECH_OUTPUT_FORMAT: str
|
||||||
|
|
||||||
|
|
||||||
@ -172,6 +173,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"VOICE": request.app.state.config.TTS_VOICE,
|
"VOICE": request.app.state.config.TTS_VOICE,
|
||||||
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
||||||
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
||||||
|
"AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
||||||
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||||
},
|
},
|
||||||
"stt": {
|
"stt": {
|
||||||
@ -202,6 +204,9 @@ async def update_audio_config(
|
|||||||
request.app.state.config.TTS_VOICE = form_data.tts.VOICE
|
request.app.state.config.TTS_VOICE = form_data.tts.VOICE
|
||||||
request.app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
request.app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
||||||
request.app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
|
request.app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
|
||||||
|
request.app.state.config.TTS_AZURE_SPEECH_BASE_URL = (
|
||||||
|
form_data.tts.AZURE_SPEECH_BASE_URL
|
||||||
|
)
|
||||||
request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
|
request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
|
||||||
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
|
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
|
||||||
)
|
)
|
||||||
@ -235,6 +240,7 @@ async def update_audio_config(
|
|||||||
"VOICE": request.app.state.config.TTS_VOICE,
|
"VOICE": request.app.state.config.TTS_VOICE,
|
||||||
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
||||||
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
||||||
|
"AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
||||||
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||||
},
|
},
|
||||||
"stt": {
|
"stt": {
|
||||||
@ -406,7 +412,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
|||||||
log.exception(e)
|
log.exception(e)
|
||||||
raise HTTPException(status_code=400, detail="Invalid JSON payload")
|
raise HTTPException(status_code=400, detail="Invalid JSON payload")
|
||||||
|
|
||||||
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
region = request.app.state.config.TTS_AZURE_SPEECH_REGION or "eastus"
|
||||||
|
base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
||||||
language = request.app.state.config.TTS_VOICE
|
language = request.app.state.config.TTS_VOICE
|
||||||
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
|
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
|
||||||
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
||||||
@ -420,7 +427,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
|||||||
timeout=timeout, trust_env=True
|
timeout=timeout, trust_env=True
|
||||||
) as session:
|
) as session:
|
||||||
async with session.post(
|
async with session.post(
|
||||||
f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
|
(base_url or f"https://{region}.tts.speech.microsoft.com")
|
||||||
|
+ "/cognitiveservices/v1",
|
||||||
headers={
|
headers={
|
||||||
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
|
||||||
"Content-Type": "application/ssml+xml",
|
"Content-Type": "application/ssml+xml",
|
||||||
@ -651,10 +659,10 @@ def transcribe(request: Request, file_path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
||||||
region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
region = request.app.state.config.AUDIO_STT_AZURE_REGION or "eastus"
|
||||||
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
||||||
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
||||||
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS
|
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS or 3
|
||||||
|
|
||||||
# IF NO LOCALES, USE DEFAULTS
|
# IF NO LOCALES, USE DEFAULTS
|
||||||
if len(locales) < 2:
|
if len(locales) < 2:
|
||||||
@ -681,12 +689,6 @@ def transcribe(request: Request, file_path):
|
|||||||
detail="Azure API key is required for Azure STT",
|
detail="Azure API key is required for Azure STT",
|
||||||
)
|
)
|
||||||
|
|
||||||
if not base_url and not region:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail="Azure region or base url is required for Azure STT",
|
|
||||||
)
|
|
||||||
|
|
||||||
r = None
|
r = None
|
||||||
try:
|
try:
|
||||||
# Prepare the request
|
# Prepare the request
|
||||||
@ -702,9 +704,8 @@ def transcribe(request: Request, file_path):
|
|||||||
}
|
}
|
||||||
|
|
||||||
url = (
|
url = (
|
||||||
base_url
|
base_url or f"https://{region}.api.cognitive.microsoft.com"
|
||||||
or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
) + "/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||||
)
|
|
||||||
|
|
||||||
# Use context manager to ensure file is properly closed
|
# Use context manager to ensure file is properly closed
|
||||||
with open(file_path, "rb") as audio_file:
|
with open(file_path, "rb") as audio_file:
|
||||||
@ -933,7 +934,10 @@ def get_available_voices(request) -> dict:
|
|||||||
elif request.app.state.config.TTS_ENGINE == "azure":
|
elif request.app.state.config.TTS_ENGINE == "azure":
|
||||||
try:
|
try:
|
||||||
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
||||||
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
|
base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
||||||
|
url = (
|
||||||
|
base_url or f"https://{region}.tts.speech.microsoft.com"
|
||||||
|
) + "/cognitiveservices/voices/list"
|
||||||
headers = {
|
headers = {
|
||||||
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY
|
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
let TTS_VOICE = '';
|
let TTS_VOICE = '';
|
||||||
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
|
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||||
let TTS_AZURE_SPEECH_REGION = '';
|
let TTS_AZURE_SPEECH_REGION = '';
|
||||||
|
let TTS_AZURE_SPEECH_BASE_URL = '';
|
||||||
let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
|
let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
|
||||||
|
|
||||||
let STT_OPENAI_API_BASE_URL = '';
|
let STT_OPENAI_API_BASE_URL = '';
|
||||||
@ -105,6 +106,7 @@
|
|||||||
VOICE: TTS_VOICE,
|
VOICE: TTS_VOICE,
|
||||||
SPLIT_ON: TTS_SPLIT_ON,
|
SPLIT_ON: TTS_SPLIT_ON,
|
||||||
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
|
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
|
||||||
|
AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL,
|
||||||
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
||||||
},
|
},
|
||||||
stt: {
|
stt: {
|
||||||
@ -149,8 +151,9 @@
|
|||||||
|
|
||||||
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
|
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||||
|
|
||||||
TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
|
|
||||||
TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
|
TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
|
||||||
|
TTS_AZURE_SPEECH_BASE_URL = res.tts.AZURE_SPEECH_BASE_URL;
|
||||||
|
TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
|
||||||
|
|
||||||
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
||||||
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
|
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
|
||||||
@ -272,16 +275,23 @@
|
|||||||
bind:value={STT_AZURE_API_KEY}
|
bind:value={STT_AZURE_API_KEY}
|
||||||
required
|
required
|
||||||
/>
|
/>
|
||||||
<input
|
|
||||||
class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
|
||||||
placeholder={$i18n.t('Azure Region')}
|
|
||||||
bind:value={STT_AZURE_REGION}
|
|
||||||
required
|
|
||||||
/>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Azure Region')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={STT_AZURE_REGION}
|
||||||
|
placeholder={$i18n.t('e.g., westus (leave blank for eastus)')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Language Locales')}</div>
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Language Locales')}</div>
|
||||||
<div class="flex w-full">
|
<div class="flex w-full">
|
||||||
@ -293,16 +303,16 @@
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Base URL')}</div>
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Endpoint URL')}</div>
|
||||||
<div class="flex w-full">
|
<div class="flex w-full">
|
||||||
<div class="flex-1">
|
<div class="flex-1">
|
||||||
<input
|
<input
|
||||||
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
bind:value={STT_AZURE_BASE_URL}
|
bind:value={STT_AZURE_BASE_URL}
|
||||||
placeholder={$i18n.t('(leave blank for Azure Commercial URL auto-generation)')}
|
placeholder={$i18n.t('(leave blank for to use commercial endpoint)')}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -468,18 +478,39 @@
|
|||||||
{:else if TTS_ENGINE === 'azure'}
|
{:else if TTS_ENGINE === 'azure'}
|
||||||
<div>
|
<div>
|
||||||
<div class="mt-1 flex gap-2 mb-1">
|
<div class="mt-1 flex gap-2 mb-1">
|
||||||
<input
|
<SensitiveInput
|
||||||
class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
|
||||||
placeholder={$i18n.t('API Key')}
|
placeholder={$i18n.t('API Key')}
|
||||||
bind:value={TTS_API_KEY}
|
bind:value={TTS_API_KEY}
|
||||||
required
|
required
|
||||||
/>
|
/>
|
||||||
<input
|
</div>
|
||||||
class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
|
||||||
placeholder={$i18n.t('Azure Region')}
|
<hr class="border-gray-100 dark:border-gray-850 my-2" />
|
||||||
bind:value={TTS_AZURE_SPEECH_REGION}
|
|
||||||
required
|
<div>
|
||||||
/>
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Azure Region')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={TTS_AZURE_SPEECH_REGION}
|
||||||
|
placeholder={$i18n.t('e.g., westus (leave blank for eastus)')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Endpoint URL')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={TTS_AZURE_SPEECH_BASE_URL}
|
||||||
|
placeholder={$i18n.t('(leave blank for to use commercial endpoint)')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
Loading…
Reference in New Issue
Block a user