Update configuration page, expose all Azure settings through ENV variables

This commit is contained in:
Pawel Ochman 2024-09-18 14:13:42 +01:00
parent eacb69074e
commit 4d9677e808
3 changed files with 138 additions and 44 deletions

View File

@ -19,6 +19,8 @@ from open_webui.config import (
AUDIO_TTS_OPENAI_API_KEY,
AUDIO_TTS_SPLIT_ON,
AUDIO_TTS_VOICE,
AUDIO_TTS_AZURE_SPEECH_REGION,
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
CACHE_DIR,
CORS_ALLOW_ORIGIN,
WHISPER_MODEL,
@ -62,6 +64,9 @@ app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
app.state.config.TTS_AZURE_SPEECH_REGION = AUDIO_TTS_AZURE_SPEECH_REGION
app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
log.info(f"whisper_device_type: {whisper_device_type}")
@ -78,6 +83,8 @@ class TTSConfigForm(BaseModel):
MODEL: str
VOICE: str
SPLIT_ON: str
AZURE_SPEECH_REGION: str
AZURE_SPEECH_OUTPUT_FORMAT: str
class STTConfigForm(BaseModel):
@ -130,6 +137,8 @@ async def get_audio_config(user=Depends(get_admin_user)):
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
"AZURE_SPEECH_REGION": app.state.config.TTS_AZURE_SPEECH_REGION,
"AZURE_SPEECH_OUTPUT_FORMAT": app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
@ -151,6 +160,8 @@ async def update_audio_config(
app.state.config.TTS_MODEL = form_data.tts.MODEL
app.state.config.TTS_VOICE = form_data.tts.VOICE
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
@ -166,6 +177,8 @@ async def update_audio_config(
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
"AZURE_SPEECH_REGION": app.state.config.TTS_AZURE_SPEECH_REGION,
"AZURE_SPEECH_OUTPUT_FORMAT": app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
@ -309,10 +322,10 @@ async def speech(request: Request, user=Depends(get_verified_user)):
log.exception(e)
raise HTTPException(status_code=400, detail="Invalid JSON payload")
region = "uksouth"
language = "en-GB-SoniaNeural"
locale = "en-GB"
output_format = "audio-24khz-160kbitrate-mono-mp3"
region = app.state.config.TTS_AZURE_SPEECH_REGION
language = app.state.config.TTS_VOICE
locale = "-".join(app.state.config.TTS_VOICE.split("-")[:1])
output_format = app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1"
headers = {
@ -515,6 +528,22 @@ def get_available_voices() -> dict:
except Exception:
# Avoided @lru_cache with exception
pass
elif app.state.config.TTS_ENGINE == "azurespeechservice":
try:
region = app.state.config.TTS_AZURE_SPEECH_REGION
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
headers = {
'Ocp-Apim-Subscription-Key': app.state.config.TTS_API_KEY
}
response = requests.get(url, headers=headers)
response.raise_for_status()
voices = response.json()
for voice in voices:
ret[voice['ShortName']] = f"{voice['DisplayName']} ({voice['ShortName']})"
except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}")
return ret

View File

@ -1472,3 +1472,15 @@ AUDIO_TTS_SPLIT_ON = PersistentConfig(
"audio.tts.split_on",
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
)
AUDIO_TTS_AZURE_SPEECH_REGION = PersistentConfig(
"AUDIO_TTS_AZURE_SPEECH_REGION",
"audio.tts.azure_speech_region",
os.getenv("AUDIO_TTS_AZURE_SPEECH_REGION", "uksouth"),
)
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT = PersistentConfig(
"AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT",
"audio.tts.azure_speech_output_format",
os.getenv("AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT", 'audio-24khz-160kbitrate-mono-mp3'),
)

View File

@ -31,6 +31,8 @@
let TTS_MODEL = '';
let TTS_VOICE = '';
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
let TTS_AZURE_SPEECH_REGION = '';
let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
let STT_OPENAI_API_BASE_URL = '';
let STT_OPENAI_API_KEY = '';
@ -87,7 +89,9 @@
ENGINE: TTS_ENGINE,
MODEL: TTS_MODEL,
VOICE: TTS_VOICE,
SPLIT_ON: TTS_SPLIT_ON
SPLIT_ON: TTS_SPLIT_ON,
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT,
},
stt: {
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
@ -120,6 +124,9 @@
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
@ -262,6 +269,12 @@
bind:value={TTS_API_KEY}
required
/>
<input
class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('Azure Region')}
bind:value={TTS_AZURE_SPEECH_REGION}
required
/>
</div>
</div>
{/if}
@ -330,48 +343,88 @@
</div>
</div>
</div>
{:else if TTS_ENGINE === 'elevenlabs'}
<div class=" flex gap-2">
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_VOICE}
placeholder="Select a voice"
/>
<datalist id="voice-list">
{#each voices as voice}
<option value={voice.id}>{voice.name}</option>
{/each}
</datalist>
{:else if TTS_ENGINE === 'elevenlabs'}
<div class=" flex gap-2">
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_VOICE}
placeholder="Select a voice"
/>
<datalist id="voice-list">
{#each voices as voice}
<option value={voice.id}>{voice.name}</option>
{/each}
</datalist>
</div>
</div>
</div>
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL}
placeholder="Select a model"
/>
<datalist id="tts-model-list">
{#each models as model}
<option value={model.id} />
{/each}
</datalist>
</div>
</div>
</div>
</div>
{:else if TTS_ENGINE === 'azurespeechservice'}
<div class=" flex gap-2">
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_VOICE}
placeholder="Select a voice"
/>
<datalist id="voice-list">
{#each voices as voice}
<option value={voice.id}>{voice.name}</option>
{/each}
</datalist>
</div>
</div>
</div>
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">
{$i18n.t('Output format')}
<a href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs" target="_blank" >
<small>{$i18n.t('Available list')}</small>
</a>
</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_AZURE_SPEECH_OUTPUT_FORMAT}
placeholder="Select a output format"
/>
</div>
</div>
</div>
</div>
<div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={TTS_MODEL}
placeholder="Select a model"
/>
<datalist id="tts-model-list">
{#each models as model}
<option value={model.id} />
{/each}
</datalist>
</div>
</div>
</div>
</div>
{/if}
{/if}
<hr class="dark:border-gray-850 my-2" />