mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
refac: audio
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { settings, showCallOverlay } from '$lib/stores';
|
||||
import { config, settings, showCallOverlay } from '$lib/stores';
|
||||
import { onMount, tick, getContext } from 'svelte';
|
||||
|
||||
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
|
||||
@@ -159,9 +159,9 @@
|
||||
const getOpenAISpeech = async (text) => {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.speaker ?? 'alloy',
|
||||
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
||||
text,
|
||||
$settings?.audio?.model ?? 'tts-1'
|
||||
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
assistantSpeaking = false;
|
||||
@@ -207,10 +207,29 @@
|
||||
const assistantSpeakingHandler = async (content) => {
|
||||
assistantSpeaking = true;
|
||||
|
||||
if (($settings?.audio?.TTSEngine ?? '') == '') {
|
||||
currentUtterance = new SpeechSynthesisUtterance(content);
|
||||
speechSynthesis.speak(currentUtterance);
|
||||
} else if ($settings?.audio?.TTSEngine === 'openai') {
|
||||
if (($config.audio.tts.engine ?? '') == '') {
|
||||
let voices = [];
|
||||
const getVoicesLoop = setInterval(async () => {
|
||||
voices = await speechSynthesis.getVoices();
|
||||
if (voices.length > 0) {
|
||||
clearInterval(getVoicesLoop);
|
||||
|
||||
const voice =
|
||||
voices
|
||||
?.filter(
|
||||
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
)
|
||||
?.at(0) ?? undefined;
|
||||
|
||||
console.log($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice);
|
||||
console.log(voices);
|
||||
|
||||
currentUtterance = new SpeechSynthesisUtterance(content);
|
||||
currentUtterance.voice = voice;
|
||||
speechSynthesis.speak(currentUtterance);
|
||||
}
|
||||
}, 100);
|
||||
} else if ($config.audio.tts.engine === 'openai') {
|
||||
console.log('openai');
|
||||
|
||||
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
|
||||
@@ -236,9 +255,9 @@
|
||||
for (const [idx, sentence] of sentences.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.speaker,
|
||||
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
||||
sentence,
|
||||
$settings?.audio?.model
|
||||
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@
|
||||
mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
|
||||
mediaRecorder.onstop = async () => {
|
||||
console.log('Recording stopped');
|
||||
if (($settings?.audio?.STTEngine ?? '') === 'web') {
|
||||
if (($settings?.audio?.stt?.engine ?? '') === 'web') {
|
||||
audioChunks = [];
|
||||
} else {
|
||||
if (confirmed) {
|
||||
@@ -186,7 +186,7 @@
|
||||
};
|
||||
mediaRecorder.start();
|
||||
|
||||
if (($settings?.audio?.STTEngine ?? '') === 'web') {
|
||||
if (($settings?.audio?.stt?.engine ?? '') === 'web') {
|
||||
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
|
||||
// Create a SpeechRecognition object
|
||||
speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
|
||||
|
||||
@@ -213,7 +213,7 @@
|
||||
} else {
|
||||
speaking = true;
|
||||
|
||||
if ($settings?.audio?.TTSEngine === 'openai') {
|
||||
if ($config.audio.tts.engine === 'openai') {
|
||||
loadingSpeech = true;
|
||||
|
||||
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
|
||||
@@ -244,9 +244,9 @@
|
||||
for (const [idx, sentence] of sentences.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.speaker,
|
||||
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
||||
sentence,
|
||||
$settings?.audio?.model
|
||||
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
|
||||
@@ -273,7 +273,11 @@
|
||||
clearInterval(getVoicesLoop);
|
||||
|
||||
const voice =
|
||||
voices?.filter((v) => v.name === $settings?.audio?.speaker)?.at(0) ?? undefined;
|
||||
voices
|
||||
?.filter(
|
||||
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
)
|
||||
?.at(0) ?? undefined;
|
||||
|
||||
const speak = new SpeechSynthesisUtterance(message.content);
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
|
||||
import { user, settings } from '$lib/stores';
|
||||
import { user, settings, config } from '$lib/stores';
|
||||
import { createEventDispatcher, onMount, getContext } from 'svelte';
|
||||
import { toast } from 'svelte-sonner';
|
||||
import Switch from '$lib/components/common/Switch.svelte';
|
||||
@@ -11,26 +10,15 @@
|
||||
export let saveSettings: Function;
|
||||
|
||||
// Audio
|
||||
|
||||
let OpenAIUrl = '';
|
||||
let OpenAIKey = '';
|
||||
let OpenAISpeaker = '';
|
||||
|
||||
let STTEngines = ['', 'openai'];
|
||||
let STTEngine = '';
|
||||
|
||||
let conversationMode = false;
|
||||
let speechAutoSend = false;
|
||||
let responseAutoPlayback = false;
|
||||
let nonLocalVoices = false;
|
||||
|
||||
let TTSEngines = ['', 'openai'];
|
||||
let TTSEngine = '';
|
||||
let STTEngine = '';
|
||||
|
||||
let voices = [];
|
||||
let speaker = '';
|
||||
let models = [];
|
||||
let model = '';
|
||||
let voice = '';
|
||||
|
||||
const getOpenAIVoices = () => {
|
||||
voices = [
|
||||
@@ -43,10 +31,6 @@
|
||||
];
|
||||
};
|
||||
|
||||
const getOpenAIVoicesModel = () => {
|
||||
models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }];
|
||||
};
|
||||
|
||||
const getWebAPIVoices = () => {
|
||||
const getVoicesLoop = setInterval(async () => {
|
||||
voices = await speechSynthesis.getVoices();
|
||||
@@ -58,21 +42,6 @@
|
||||
}, 100);
|
||||
};
|
||||
|
||||
const toggleConversationMode = async () => {
|
||||
conversationMode = !conversationMode;
|
||||
|
||||
if (conversationMode) {
|
||||
responseAutoPlayback = true;
|
||||
speechAutoSend = true;
|
||||
}
|
||||
|
||||
saveSettings({
|
||||
conversationMode: conversationMode,
|
||||
responseAutoPlayback: responseAutoPlayback,
|
||||
speechAutoSend: speechAutoSend
|
||||
});
|
||||
};
|
||||
|
||||
const toggleResponseAutoPlayback = async () => {
|
||||
responseAutoPlayback = !responseAutoPlayback;
|
||||
saveSettings({ responseAutoPlayback: responseAutoPlayback });
|
||||
@@ -83,76 +52,35 @@
|
||||
saveSettings({ speechAutoSend: speechAutoSend });
|
||||
};
|
||||
|
||||
const updateConfigHandler = async () => {
|
||||
if (TTSEngine === 'openai') {
|
||||
const res = await updateAudioConfig(localStorage.token, {
|
||||
url: OpenAIUrl,
|
||||
key: OpenAIKey,
|
||||
model: model,
|
||||
speaker: OpenAISpeaker
|
||||
});
|
||||
|
||||
if (res) {
|
||||
OpenAIUrl = res.OPENAI_API_BASE_URL;
|
||||
OpenAIKey = res.OPENAI_API_KEY;
|
||||
model = res.OPENAI_API_MODEL;
|
||||
OpenAISpeaker = res.OPENAI_API_VOICE;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
onMount(async () => {
|
||||
conversationMode = $settings.conversationMode ?? false;
|
||||
speechAutoSend = $settings.speechAutoSend ?? false;
|
||||
responseAutoPlayback = $settings.responseAutoPlayback ?? false;
|
||||
|
||||
STTEngine = $settings?.audio?.STTEngine ?? '';
|
||||
TTSEngine = $settings?.audio?.TTSEngine ?? '';
|
||||
nonLocalVoices = $settings.audio?.nonLocalVoices ?? false;
|
||||
speaker = $settings?.audio?.speaker ?? '';
|
||||
model = $settings?.audio?.model ?? '';
|
||||
STTEngine = $settings?.audio?.stt?.engine ?? '';
|
||||
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
|
||||
nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
|
||||
|
||||
if (TTSEngine === 'openai') {
|
||||
if ($config.audio.tts.engine === 'openai') {
|
||||
getOpenAIVoices();
|
||||
getOpenAIVoicesModel();
|
||||
} else {
|
||||
getWebAPIVoices();
|
||||
}
|
||||
|
||||
if ($user.role === 'admin') {
|
||||
const res = await getAudioConfig(localStorage.token);
|
||||
|
||||
if (res) {
|
||||
OpenAIUrl = res.OPENAI_API_BASE_URL;
|
||||
OpenAIKey = res.OPENAI_API_KEY;
|
||||
model = res.OPENAI_API_MODEL;
|
||||
OpenAISpeaker = res.OPENAI_API_VOICE;
|
||||
if (TTSEngine === 'openai') {
|
||||
speaker = OpenAISpeaker;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<form
|
||||
class="flex flex-col h-full justify-between space-y-3 text-sm"
|
||||
on:submit|preventDefault={async () => {
|
||||
if ($user.role === 'admin') {
|
||||
await updateConfigHandler();
|
||||
}
|
||||
saveSettings({
|
||||
audio: {
|
||||
STTEngine: STTEngine !== '' ? STTEngine : undefined,
|
||||
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
|
||||
speaker:
|
||||
(TTSEngine === 'openai' ? OpenAISpeaker : speaker) !== ''
|
||||
? TTSEngine === 'openai'
|
||||
? OpenAISpeaker
|
||||
: speaker
|
||||
: undefined,
|
||||
model: model !== '' ? model : undefined,
|
||||
nonLocalVoices: nonLocalVoices
|
||||
stt: {
|
||||
engine: STTEngine !== '' ? STTEngine : undefined
|
||||
},
|
||||
tts: {
|
||||
voice: $config.audio.tts.engine === 'openai' ? voice : voice !== '' ? voice : undefined,
|
||||
nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined
|
||||
}
|
||||
}
|
||||
});
|
||||
dispatch('save');
|
||||
@@ -162,31 +90,21 @@
|
||||
<div>
|
||||
<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={STTEngine}
|
||||
placeholder="Select an engine"
|
||||
on:change={(e) => {
|
||||
if (e.target.value !== '') {
|
||||
navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
|
||||
toast.error(
|
||||
$i18n.t(`Permission denied when accessing microphone: {{error}}`, {
|
||||
error: err
|
||||
})
|
||||
);
|
||||
STTEngine = '';
|
||||
});
|
||||
}
|
||||
}}
|
||||
>
|
||||
<option value="">{$i18n.t('Default (Whisper)')}</option>
|
||||
<option value="web">{$i18n.t('Web API')}</option>
|
||||
</select>
|
||||
{#if $config.audio.stt.engine !== 'web'}
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={STTEngine}
|
||||
placeholder="Select an engine"
|
||||
>
|
||||
<option value="">{$i18n.t('Default')}</option>
|
||||
<option value="web">{$i18n.t('Web API')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">
|
||||
@@ -212,50 +130,6 @@
|
||||
<div>
|
||||
<div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={TTSEngine}
|
||||
placeholder="Select a mode"
|
||||
on:change={(e) => {
|
||||
if (e.target.value === 'openai') {
|
||||
getOpenAIVoices();
|
||||
OpenAISpeaker = 'alloy';
|
||||
model = 'tts-1';
|
||||
} else {
|
||||
getWebAPIVoices();
|
||||
speaker = '';
|
||||
}
|
||||
}}
|
||||
>
|
||||
<option value="">{$i18n.t('Default (Web API)')}</option>
|
||||
<option value="openai">{$i18n.t('Open AI')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if $user.role === 'admin'}
|
||||
{#if TTSEngine === 'openai'}
|
||||
<div class="mt-1 flex gap-2 mb-1">
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
placeholder={$i18n.t('API Base URL')}
|
||||
bind:value={OpenAIUrl}
|
||||
required
|
||||
/>
|
||||
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
placeholder={$i18n.t('API Key')}
|
||||
bind:value={OpenAIKey}
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
|
||||
|
||||
@@ -277,21 +151,21 @@
|
||||
|
||||
<hr class=" dark:border-gray-700" />
|
||||
|
||||
{#if TTSEngine === ''}
|
||||
{#if $config.audio.tts.engine === ''}
|
||||
<div>
|
||||
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
||||
<div class="flex w-full">
|
||||
<div class="flex-1">
|
||||
<select
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
bind:value={speaker}
|
||||
bind:value={voice}
|
||||
>
|
||||
<option value="" selected={speaker !== ''}>{$i18n.t('Default')}</option>
|
||||
{#each voices.filter((v) => nonLocalVoices || v.localService === true) as voice}
|
||||
<option value="" selected={voice !== ''}>{$i18n.t('Default')}</option>
|
||||
{#each voices.filter((v) => nonLocalVoices || v.localService === true) as _voice}
|
||||
<option
|
||||
value={voice.name}
|
||||
value={_voice.name}
|
||||
class="bg-gray-100 dark:bg-gray-700"
|
||||
selected={speaker === voice.name}>{voice.name}</option
|
||||
selected={voice === _voice.name}>{_voice.name}</option
|
||||
>
|
||||
{/each}
|
||||
</select>
|
||||
@@ -307,7 +181,7 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else if TTSEngine === 'openai'}
|
||||
{:else if $config.audio.tts.engine === 'openai'}
|
||||
<div>
|
||||
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
||||
<div class="flex w-full">
|
||||
@@ -315,7 +189,7 @@
|
||||
<input
|
||||
list="voice-list"
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
bind:value={OpenAISpeaker}
|
||||
bind:value={voice}
|
||||
placeholder="Select a voice"
|
||||
/>
|
||||
|
||||
@@ -327,25 +201,6 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Model')}</div>
|
||||
<div class="flex w-full">
|
||||
<div class="flex-1">
|
||||
<input
|
||||
list="model-list"
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
bind:value={model}
|
||||
placeholder="Select a model"
|
||||
/>
|
||||
|
||||
<datalist id="model-list">
|
||||
{#each models as model}
|
||||
<option value={model.name} />
|
||||
{/each}
|
||||
</datalist>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user