mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Merge pull request #13379 from NoMoreFood/dev
feat: Azure STT Allow Base URL & Max Speaker Setting
This commit is contained in:
commit
e57f2c928a
@ -2650,6 +2650,18 @@ AUDIO_STT_AZURE_LOCALES = PersistentConfig(
|
||||
os.getenv("AUDIO_STT_AZURE_LOCALES", ""),
|
||||
)
|
||||
|
||||
AUDIO_STT_AZURE_BASE_URL = PersistentConfig(
|
||||
"AUDIO_STT_AZURE_BASE_URL",
|
||||
"audio.stt.azure.base_url",
|
||||
os.getenv("AUDIO_STT_AZURE_BASE_URL", ""),
|
||||
)
|
||||
|
||||
AUDIO_STT_AZURE_MAX_SPEAKERS = PersistentConfig(
|
||||
"AUDIO_STT_AZURE_MAX_SPEAKERS",
|
||||
"audio.stt.azure.max_speakers",
|
||||
os.getenv("AUDIO_STT_AZURE_MAX_SPEAKERS", "3"),
|
||||
)
|
||||
|
||||
AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig(
|
||||
"AUDIO_TTS_OPENAI_API_BASE_URL",
|
||||
"audio.tts.openai.api_base_url",
|
||||
|
@ -155,6 +155,8 @@ from open_webui.config import (
|
||||
AUDIO_STT_AZURE_API_KEY,
|
||||
AUDIO_STT_AZURE_REGION,
|
||||
AUDIO_STT_AZURE_LOCALES,
|
||||
AUDIO_STT_AZURE_BASE_URL,
|
||||
AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||
AUDIO_TTS_API_KEY,
|
||||
AUDIO_TTS_ENGINE,
|
||||
AUDIO_TTS_MODEL,
|
||||
@ -829,6 +831,8 @@ app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
|
||||
app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY
|
||||
app.state.config.AUDIO_STT_AZURE_REGION = AUDIO_STT_AZURE_REGION
|
||||
app.state.config.AUDIO_STT_AZURE_LOCALES = AUDIO_STT_AZURE_LOCALES
|
||||
app.state.config.AUDIO_STT_AZURE_BASE_URL = AUDIO_STT_AZURE_BASE_URL
|
||||
app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = AUDIO_STT_AZURE_MAX_SPEAKERS
|
||||
|
||||
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
|
||||
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
|
||||
|
@ -150,7 +150,8 @@ class STTConfigForm(BaseModel):
|
||||
AZURE_API_KEY: str
|
||||
AZURE_REGION: str
|
||||
AZURE_LOCALES: str
|
||||
|
||||
AZURE_BASE_URL: str
|
||||
AZURE_MAX_SPEAKERS: str
|
||||
|
||||
class AudioConfigUpdateForm(BaseModel):
|
||||
tts: TTSConfigForm
|
||||
@ -181,6 +182,8 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
||||
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
||||
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
||||
"AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||
},
|
||||
}
|
||||
|
||||
@ -210,6 +213,8 @@ async def update_audio_config(
|
||||
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
|
||||
request.app.state.config.AUDIO_STT_AZURE_REGION = form_data.stt.AZURE_REGION
|
||||
request.app.state.config.AUDIO_STT_AZURE_LOCALES = form_data.stt.AZURE_LOCALES
|
||||
request.app.state.config.AUDIO_STT_AZURE_BASE_URL = form_data.stt.AZURE_BASE_URL
|
||||
request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = form_data.stt.AZURE_MAX_SPEAKERS
|
||||
|
||||
if request.app.state.config.STT_ENGINE == "":
|
||||
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
||||
@ -238,6 +243,8 @@ async def update_audio_config(
|
||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
||||
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
||||
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
||||
"AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||
},
|
||||
}
|
||||
|
||||
@ -641,6 +648,8 @@ def transcribe(request: Request, file_path):
|
||||
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
||||
region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
||||
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
||||
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
||||
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS
|
||||
|
||||
# IF NO LOCALES, USE DEFAULTS
|
||||
if len(locales) < 2:
|
||||
@ -664,7 +673,13 @@ def transcribe(request: Request, file_path):
|
||||
if not api_key or not region:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Azure API key and region are required for Azure STT",
|
||||
detail="Azure API key is required for Azure STT",
|
||||
)
|
||||
|
||||
if not base_url and not region:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Azure region or base url is required for Azure STT",
|
||||
)
|
||||
|
||||
r = None
|
||||
@ -674,13 +689,14 @@ def transcribe(request: Request, file_path):
|
||||
"definition": json.dumps(
|
||||
{
|
||||
"locales": locales.split(","),
|
||||
"diarization": {"maxSpeakers": 3, "enabled": True},
|
||||
"diarization": {"maxSpeakers": max_speakers, "enabled": True},
|
||||
}
|
||||
if locales
|
||||
else {}
|
||||
)
|
||||
}
|
||||
url = f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||
|
||||
url = base_url or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||
|
||||
# Use context manager to ensure file is properly closed
|
||||
with open(file_path, "rb") as audio_file:
|
||||
|
@ -42,6 +42,8 @@
|
||||
let STT_AZURE_API_KEY = '';
|
||||
let STT_AZURE_REGION = '';
|
||||
let STT_AZURE_LOCALES = '';
|
||||
let STT_AZURE_BASE_URL = '';
|
||||
let STT_AZURE_MAX_SPEAKERS = '';
|
||||
let STT_DEEPGRAM_API_KEY = '';
|
||||
|
||||
let STT_WHISPER_MODEL_LOADING = false;
|
||||
@ -114,7 +116,9 @@
|
||||
DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
|
||||
AZURE_API_KEY: STT_AZURE_API_KEY,
|
||||
AZURE_REGION: STT_AZURE_REGION,
|
||||
AZURE_LOCALES: STT_AZURE_LOCALES
|
||||
AZURE_LOCALES: STT_AZURE_LOCALES,
|
||||
AZURE_BASE_URL: STT_AZURE_BASE_URL,
|
||||
AZURE_MAX_SPEAKERS: STT_AZURE_MAX_SPEAKERS
|
||||
}
|
||||
});
|
||||
|
||||
@ -157,6 +161,8 @@
|
||||
STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
|
||||
STT_AZURE_REGION = res.stt.AZURE_REGION;
|
||||
STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
|
||||
STT_AZURE_BASE_URL = res.stt.AZURE_BASE_URL;
|
||||
STT_AZURE_MAX_SPEAKERS = res.stt.AZURE_MAX_SPEAKERS;
|
||||
STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
|
||||
}
|
||||
|
||||
@ -287,6 +293,32 @@
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Base URL')}</div>
|
||||
<div class="flex w-full">
|
||||
<div class="flex-1">
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
bind:value={STT_AZURE_BASE_URL}
|
||||
placeholder={$i18n.t('(leave blank for Azure Commercial URL auto-generation)')}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Max Speakers')}</div>
|
||||
<div class="flex w-full">
|
||||
<div class="flex-1">
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
bind:value={STT_AZURE_MAX_SPEAKERS}
|
||||
placeholder={$i18n.t('e.g., 3, 4, 5 (leave blank for default)')}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else if STT_ENGINE === ''}
|
||||
|
Loading…
Reference in New Issue
Block a user