mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
Merge pull request #13379 from NoMoreFood/dev
feat: Azure STT Allow Base URL & Max Speaker Setting
This commit is contained in:
commit
e57f2c928a
@ -2650,6 +2650,18 @@ AUDIO_STT_AZURE_LOCALES = PersistentConfig(
|
|||||||
os.getenv("AUDIO_STT_AZURE_LOCALES", ""),
|
os.getenv("AUDIO_STT_AZURE_LOCALES", ""),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
AUDIO_STT_AZURE_BASE_URL = PersistentConfig(
|
||||||
|
"AUDIO_STT_AZURE_BASE_URL",
|
||||||
|
"audio.stt.azure.base_url",
|
||||||
|
os.getenv("AUDIO_STT_AZURE_BASE_URL", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
AUDIO_STT_AZURE_MAX_SPEAKERS = PersistentConfig(
|
||||||
|
"AUDIO_STT_AZURE_MAX_SPEAKERS",
|
||||||
|
"audio.stt.azure.max_speakers",
|
||||||
|
os.getenv("AUDIO_STT_AZURE_MAX_SPEAKERS", "3"),
|
||||||
|
)
|
||||||
|
|
||||||
AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig(
|
AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig(
|
||||||
"AUDIO_TTS_OPENAI_API_BASE_URL",
|
"AUDIO_TTS_OPENAI_API_BASE_URL",
|
||||||
"audio.tts.openai.api_base_url",
|
"audio.tts.openai.api_base_url",
|
||||||
|
@ -155,6 +155,8 @@ from open_webui.config import (
|
|||||||
AUDIO_STT_AZURE_API_KEY,
|
AUDIO_STT_AZURE_API_KEY,
|
||||||
AUDIO_STT_AZURE_REGION,
|
AUDIO_STT_AZURE_REGION,
|
||||||
AUDIO_STT_AZURE_LOCALES,
|
AUDIO_STT_AZURE_LOCALES,
|
||||||
|
AUDIO_STT_AZURE_BASE_URL,
|
||||||
|
AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||||
AUDIO_TTS_API_KEY,
|
AUDIO_TTS_API_KEY,
|
||||||
AUDIO_TTS_ENGINE,
|
AUDIO_TTS_ENGINE,
|
||||||
AUDIO_TTS_MODEL,
|
AUDIO_TTS_MODEL,
|
||||||
@ -829,6 +831,8 @@ app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
|
|||||||
app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY
|
app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY
|
||||||
app.state.config.AUDIO_STT_AZURE_REGION = AUDIO_STT_AZURE_REGION
|
app.state.config.AUDIO_STT_AZURE_REGION = AUDIO_STT_AZURE_REGION
|
||||||
app.state.config.AUDIO_STT_AZURE_LOCALES = AUDIO_STT_AZURE_LOCALES
|
app.state.config.AUDIO_STT_AZURE_LOCALES = AUDIO_STT_AZURE_LOCALES
|
||||||
|
app.state.config.AUDIO_STT_AZURE_BASE_URL = AUDIO_STT_AZURE_BASE_URL
|
||||||
|
app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = AUDIO_STT_AZURE_MAX_SPEAKERS
|
||||||
|
|
||||||
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
|
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
|
||||||
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
|
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
|
||||||
|
@ -150,7 +150,8 @@ class STTConfigForm(BaseModel):
|
|||||||
AZURE_API_KEY: str
|
AZURE_API_KEY: str
|
||||||
AZURE_REGION: str
|
AZURE_REGION: str
|
||||||
AZURE_LOCALES: str
|
AZURE_LOCALES: str
|
||||||
|
AZURE_BASE_URL: str
|
||||||
|
AZURE_MAX_SPEAKERS: str
|
||||||
|
|
||||||
class AudioConfigUpdateForm(BaseModel):
|
class AudioConfigUpdateForm(BaseModel):
|
||||||
tts: TTSConfigForm
|
tts: TTSConfigForm
|
||||||
@ -181,6 +182,8 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||||
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
||||||
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
||||||
|
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
||||||
|
"AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,6 +213,8 @@ async def update_audio_config(
|
|||||||
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
|
request.app.state.config.AUDIO_STT_AZURE_API_KEY = form_data.stt.AZURE_API_KEY
|
||||||
request.app.state.config.AUDIO_STT_AZURE_REGION = form_data.stt.AZURE_REGION
|
request.app.state.config.AUDIO_STT_AZURE_REGION = form_data.stt.AZURE_REGION
|
||||||
request.app.state.config.AUDIO_STT_AZURE_LOCALES = form_data.stt.AZURE_LOCALES
|
request.app.state.config.AUDIO_STT_AZURE_LOCALES = form_data.stt.AZURE_LOCALES
|
||||||
|
request.app.state.config.AUDIO_STT_AZURE_BASE_URL = form_data.stt.AZURE_BASE_URL
|
||||||
|
request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = form_data.stt.AZURE_MAX_SPEAKERS
|
||||||
|
|
||||||
if request.app.state.config.STT_ENGINE == "":
|
if request.app.state.config.STT_ENGINE == "":
|
||||||
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
||||||
@ -238,6 +243,8 @@ async def update_audio_config(
|
|||||||
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
"AZURE_API_KEY": request.app.state.config.AUDIO_STT_AZURE_API_KEY,
|
||||||
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
"AZURE_REGION": request.app.state.config.AUDIO_STT_AZURE_REGION,
|
||||||
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
"AZURE_LOCALES": request.app.state.config.AUDIO_STT_AZURE_LOCALES,
|
||||||
|
"AZURE_BASE_URL": request.app.state.config.AUDIO_STT_AZURE_BASE_URL,
|
||||||
|
"AZURE_MAX_SPEAKERS": request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -641,6 +648,8 @@ def transcribe(request: Request, file_path):
|
|||||||
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
api_key = request.app.state.config.AUDIO_STT_AZURE_API_KEY
|
||||||
region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
region = request.app.state.config.AUDIO_STT_AZURE_REGION
|
||||||
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
locales = request.app.state.config.AUDIO_STT_AZURE_LOCALES
|
||||||
|
base_url = request.app.state.config.AUDIO_STT_AZURE_BASE_URL
|
||||||
|
max_speakers = request.app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS
|
||||||
|
|
||||||
# IF NO LOCALES, USE DEFAULTS
|
# IF NO LOCALES, USE DEFAULTS
|
||||||
if len(locales) < 2:
|
if len(locales) < 2:
|
||||||
@ -664,7 +673,13 @@ def transcribe(request: Request, file_path):
|
|||||||
if not api_key or not region:
|
if not api_key or not region:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail="Azure API key and region are required for Azure STT",
|
detail="Azure API key is required for Azure STT",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not base_url and not region:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Azure region or base url is required for Azure STT",
|
||||||
)
|
)
|
||||||
|
|
||||||
r = None
|
r = None
|
||||||
@ -674,13 +689,14 @@ def transcribe(request: Request, file_path):
|
|||||||
"definition": json.dumps(
|
"definition": json.dumps(
|
||||||
{
|
{
|
||||||
"locales": locales.split(","),
|
"locales": locales.split(","),
|
||||||
"diarization": {"maxSpeakers": 3, "enabled": True},
|
"diarization": {"maxSpeakers": max_speakers, "enabled": True},
|
||||||
}
|
}
|
||||||
if locales
|
if locales
|
||||||
else {}
|
else {}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
url = f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
|
||||||
|
url = base_url or f"https://{region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2024-11-15"
|
||||||
|
|
||||||
# Use context manager to ensure file is properly closed
|
# Use context manager to ensure file is properly closed
|
||||||
with open(file_path, "rb") as audio_file:
|
with open(file_path, "rb") as audio_file:
|
||||||
|
@ -42,6 +42,8 @@
|
|||||||
let STT_AZURE_API_KEY = '';
|
let STT_AZURE_API_KEY = '';
|
||||||
let STT_AZURE_REGION = '';
|
let STT_AZURE_REGION = '';
|
||||||
let STT_AZURE_LOCALES = '';
|
let STT_AZURE_LOCALES = '';
|
||||||
|
let STT_AZURE_BASE_URL = '';
|
||||||
|
let STT_AZURE_MAX_SPEAKERS = '';
|
||||||
let STT_DEEPGRAM_API_KEY = '';
|
let STT_DEEPGRAM_API_KEY = '';
|
||||||
|
|
||||||
let STT_WHISPER_MODEL_LOADING = false;
|
let STT_WHISPER_MODEL_LOADING = false;
|
||||||
@ -114,7 +116,9 @@
|
|||||||
DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
|
DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
|
||||||
AZURE_API_KEY: STT_AZURE_API_KEY,
|
AZURE_API_KEY: STT_AZURE_API_KEY,
|
||||||
AZURE_REGION: STT_AZURE_REGION,
|
AZURE_REGION: STT_AZURE_REGION,
|
||||||
AZURE_LOCALES: STT_AZURE_LOCALES
|
AZURE_LOCALES: STT_AZURE_LOCALES,
|
||||||
|
AZURE_BASE_URL: STT_AZURE_BASE_URL,
|
||||||
|
AZURE_MAX_SPEAKERS: STT_AZURE_MAX_SPEAKERS
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -157,6 +161,8 @@
|
|||||||
STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
|
STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
|
||||||
STT_AZURE_REGION = res.stt.AZURE_REGION;
|
STT_AZURE_REGION = res.stt.AZURE_REGION;
|
||||||
STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
|
STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
|
||||||
|
STT_AZURE_BASE_URL = res.stt.AZURE_BASE_URL;
|
||||||
|
STT_AZURE_MAX_SPEAKERS = res.stt.AZURE_MAX_SPEAKERS;
|
||||||
STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
|
STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,6 +293,32 @@
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Base URL')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={STT_AZURE_BASE_URL}
|
||||||
|
placeholder={$i18n.t('(leave blank for Azure Commercial URL auto-generation)')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('Max Speakers')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||||
|
bind:value={STT_AZURE_MAX_SPEAKERS}
|
||||||
|
placeholder={$i18n.t('e.g., 3, 4, 5 (leave blank for default)')}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{:else if STT_ENGINE === ''}
|
{:else if STT_ENGINE === ''}
|
||||||
|
Loading…
Reference in New Issue
Block a user