mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Merge pull request #13540 from NoMoreFood/dev
feat: Azure TTS Allow Base URL
This commit is contained in:
@@ -138,6 +138,7 @@ class TTSConfigForm(BaseModel):
|
||||
VOICE: str
|
||||
SPLIT_ON: str
|
||||
AZURE_SPEECH_REGION: str
|
||||
AZURE_SPEECH_BASE_URL: str
|
||||
AZURE_SPEECH_OUTPUT_FORMAT: str
|
||||
|
||||
|
||||
@@ -172,6 +173,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
||||
"VOICE": request.app.state.config.TTS_VOICE,
|
||||
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
||||
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
||||
"AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
||||
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||
},
|
||||
"stt": {
|
||||
@@ -202,6 +204,9 @@ async def update_audio_config(
|
||||
request.app.state.config.TTS_VOICE = form_data.tts.VOICE
|
||||
request.app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
||||
request.app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
|
||||
request.app.state.config.TTS_AZURE_SPEECH_BASE_URL = (
|
||||
form_data.tts.AZURE_SPEECH_BASE_URL
|
||||
)
|
||||
request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
|
||||
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
|
||||
)
|
||||
@@ -235,6 +240,7 @@ async def update_audio_config(
|
||||
"VOICE": request.app.state.config.TTS_VOICE,
|
||||
"SPLIT_ON": request.app.state.config.TTS_SPLIT_ON,
|
||||
"AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION,
|
||||
"AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL,
|
||||
"AZURE_SPEECH_OUTPUT_FORMAT": request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||
},
|
||||
"stt": {
|
||||
@@ -406,7 +412,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
||||
log.exception(e)
|
||||
raise HTTPException(status_code=400, detail="Invalid JSON payload")
|
||||
|
||||
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
||||
region = request.app.state.config.TTS_AZURE_SPEECH_REGION or "eastus"
|
||||
base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
||||
language = request.app.state.config.TTS_VOICE
|
||||
locale = "-".join(request.app.state.config.TTS_VOICE.split("-")[:1])
|
||||
output_format = request.app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
|
||||
@@ -420,7 +427,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
|
||||
timeout=timeout, trust_env=True
|
||||
) as session:
|
||||
async with session.post(
|
||||
f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1",
|
||||
(base_url or f"https://{region}.tts.speech.microsoft.com")
|
||||
+ "/cognitiveservices/v1",
|
||||
headers={
|
||||
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY,
|
||||
"Content-Type": "application/ssml+xml",
|
||||
@@ -651,10 +659,10 @@ def transcribe(request: Request, file_path):
|
||||
)
|
||||
|
||||
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
||||
region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
||||
region = request.app.state.config.AUDIO_STT_AZURE_REGION or "eastus"
|
||||
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
||||
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
||||
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS
|
||||
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS or 3
|
||||
|
||||
# IF NO LOCALES, USE DEFAULTS
|
||||
if len(locales) < 2:
|
||||
@@ -681,12 +689,6 @@ def transcribe(request: Request, file_path):
|
||||
detail="Azure API key is required for Azure STT",
|
||||
)
|
||||
|
||||
if not base_url and not region:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Azure region or base url is required for Azure STT",
|
||||
)
|
||||
|
||||
r = None
|
||||
try:
|
||||
# Prepare the request
|
||||
@@ -702,9 +704,8 @@ def transcribe(request: Request, file_path):
|
||||
}
|
||||
|
||||
url = (
|
||||
base_url
|
||||
or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||
)
|
||||
base_url or f"https://{region}.api.cognitive.microsoft.com"
|
||||
) + "/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||
|
||||
# Use context manager to ensure file is properly closed
|
||||
with open(file_path, "rb") as audio_file:
|
||||
@@ -939,7 +940,10 @@ def get_available_voices(request) -> dict:
|
||||
elif request.app.state.config.TTS_ENGINE == "azure":
|
||||
try:
|
||||
region = request.app.state.config.TTS_AZURE_SPEECH_REGION
|
||||
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
|
||||
base_url = request.app.state.config.TTS_AZURE_SPEECH_BASE_URL
|
||||
url = (
|
||||
base_url or f"https://{region}.tts.speech.microsoft.com"
|
||||
) + "/cognitiveservices/voices/list"
|
||||
headers = {
|
||||
"Ocp-Apim-Subscription-Key": request.app.state.config.TTS_API_KEY
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user