refac: audio

This commit is contained in:
Timothy J. Baek
2024-06-07 20:18:48 -07:00
parent da47c2dfa3
commit 55dc6c1b3b
20 changed files with 769 additions and 464 deletions

View File

@@ -1,5 +1,5 @@
<script lang="ts">
import { settings, showCallOverlay } from '$lib/stores';
import { config, settings, showCallOverlay } from '$lib/stores';
import { onMount, tick, getContext } from 'svelte';
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
@@ -159,9 +159,9 @@
const getOpenAISpeech = async (text) => {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.speaker ?? 'alloy',
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
text,
$settings?.audio?.model ?? 'tts-1'
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
).catch((error) => {
toast.error(error);
assistantSpeaking = false;
@@ -207,10 +207,29 @@
const assistantSpeakingHandler = async (content) => {
assistantSpeaking = true;
if (($settings?.audio?.TTSEngine ?? '') == '') {
currentUtterance = new SpeechSynthesisUtterance(content);
speechSynthesis.speak(currentUtterance);
} else if ($settings?.audio?.TTSEngine === 'openai') {
if (($config.audio.tts.engine ?? '') == '') {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
console.log($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice);
console.log(voices);
currentUtterance = new SpeechSynthesisUtterance(content);
currentUtterance.voice = voice;
speechSynthesis.speak(currentUtterance);
}
}, 100);
} else if ($config.audio.tts.engine === 'openai') {
console.log('openai');
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
@@ -236,9 +255,9 @@
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.speaker,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
sentence,
$settings?.audio?.model
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
).catch((error) => {
toast.error(error);

View File

@@ -169,7 +169,7 @@
mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
mediaRecorder.onstop = async () => {
console.log('Recording stopped');
if (($settings?.audio?.STTEngine ?? '') === 'web') {
if (($settings?.audio?.stt?.engine ?? '') === 'web') {
audioChunks = [];
} else {
if (confirmed) {
@@ -186,7 +186,7 @@
};
mediaRecorder.start();
if (($settings?.audio?.STTEngine ?? '') === 'web') {
if (($settings?.audio?.stt?.engine ?? '') === 'web') {
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
// Create a SpeechRecognition object
speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();

View File

@@ -213,7 +213,7 @@
} else {
speaking = true;
if ($settings?.audio?.TTSEngine === 'openai') {
if ($config.audio.tts.engine === 'openai') {
loadingSpeech = true;
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
@@ -244,9 +244,9 @@
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.speaker,
$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
sentence,
$settings?.audio?.model
$settings?.audio?.tts?.model ?? $config?.audio?.tts?.model
).catch((error) => {
toast.error(error);
@@ -273,7 +273,11 @@
clearInterval(getVoicesLoop);
const voice =
voices?.filter((v) => v.name === $settings?.audio?.speaker)?.at(0) ?? undefined;
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
const speak = new SpeechSynthesisUtterance(message.content);

View File

@@ -1,6 +1,5 @@
<script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { user, settings } from '$lib/stores';
import { user, settings, config } from '$lib/stores';
import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner';
import Switch from '$lib/components/common/Switch.svelte';
@@ -11,26 +10,15 @@
export let saveSettings: Function;
// Audio
let OpenAIUrl = '';
let OpenAIKey = '';
let OpenAISpeaker = '';
let STTEngines = ['', 'openai'];
let STTEngine = '';
let conversationMode = false;
let speechAutoSend = false;
let responseAutoPlayback = false;
let nonLocalVoices = false;
let TTSEngines = ['', 'openai'];
let TTSEngine = '';
let STTEngine = '';
let voices = [];
let speaker = '';
let models = [];
let model = '';
let voice = '';
const getOpenAIVoices = () => {
voices = [
@@ -43,10 +31,6 @@
];
};
const getOpenAIVoicesModel = () => {
models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }];
};
const getWebAPIVoices = () => {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
@@ -58,21 +42,6 @@
}, 100);
};
const toggleConversationMode = async () => {
conversationMode = !conversationMode;
if (conversationMode) {
responseAutoPlayback = true;
speechAutoSend = true;
}
saveSettings({
conversationMode: conversationMode,
responseAutoPlayback: responseAutoPlayback,
speechAutoSend: speechAutoSend
});
};
const toggleResponseAutoPlayback = async () => {
responseAutoPlayback = !responseAutoPlayback;
saveSettings({ responseAutoPlayback: responseAutoPlayback });
@@ -83,76 +52,35 @@
saveSettings({ speechAutoSend: speechAutoSend });
};
const updateConfigHandler = async () => {
if (TTSEngine === 'openai') {
const res = await updateAudioConfig(localStorage.token, {
url: OpenAIUrl,
key: OpenAIKey,
model: model,
speaker: OpenAISpeaker
});
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
model = res.OPENAI_API_MODEL;
OpenAISpeaker = res.OPENAI_API_VOICE;
}
}
};
onMount(async () => {
conversationMode = $settings.conversationMode ?? false;
speechAutoSend = $settings.speechAutoSend ?? false;
responseAutoPlayback = $settings.responseAutoPlayback ?? false;
STTEngine = $settings?.audio?.STTEngine ?? '';
TTSEngine = $settings?.audio?.TTSEngine ?? '';
nonLocalVoices = $settings.audio?.nonLocalVoices ?? false;
speaker = $settings?.audio?.speaker ?? '';
model = $settings?.audio?.model ?? '';
STTEngine = $settings?.audio?.stt?.engine ?? '';
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
if (TTSEngine === 'openai') {
if ($config.audio.tts.engine === 'openai') {
getOpenAIVoices();
getOpenAIVoicesModel();
} else {
getWebAPIVoices();
}
if ($user.role === 'admin') {
const res = await getAudioConfig(localStorage.token);
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
model = res.OPENAI_API_MODEL;
OpenAISpeaker = res.OPENAI_API_VOICE;
if (TTSEngine === 'openai') {
speaker = OpenAISpeaker;
}
}
}
});
</script>
<form
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={async () => {
if ($user.role === 'admin') {
await updateConfigHandler();
}
saveSettings({
audio: {
STTEngine: STTEngine !== '' ? STTEngine : undefined,
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
speaker:
(TTSEngine === 'openai' ? OpenAISpeaker : speaker) !== ''
? TTSEngine === 'openai'
? OpenAISpeaker
: speaker
: undefined,
model: model !== '' ? model : undefined,
nonLocalVoices: nonLocalVoices
stt: {
engine: STTEngine !== '' ? STTEngine : undefined
},
tts: {
voice: $config.audio.tts.engine === 'openai' ? voice : voice !== '' ? voice : undefined,
nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined
}
}
});
dispatch('save');
@@ -162,31 +90,21 @@
<div>
<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={STTEngine}
placeholder="Select an engine"
on:change={(e) => {
if (e.target.value !== '') {
navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
toast.error(
$i18n.t(`Permission denied when accessing microphone: {{error}}`, {
error: err
})
);
STTEngine = '';
});
}
}}
>
<option value="">{$i18n.t('Default (Whisper)')}</option>
<option value="web">{$i18n.t('Web API')}</option>
</select>
{#if $config.audio.stt.engine !== 'web'}
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={STTEngine}
placeholder="Select an engine"
>
<option value="">{$i18n.t('Default')}</option>
<option value="web">{$i18n.t('Web API')}</option>
</select>
</div>
</div>
</div>
{/if}
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">
@@ -212,50 +130,6 @@
<div>
<div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
<div class="flex items-center relative">
<select
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTSEngine}
placeholder="Select a mode"
on:change={(e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
OpenAISpeaker = 'alloy';
model = 'tts-1';
} else {
getWebAPIVoices();
speaker = '';
}
}}
>
<option value="">{$i18n.t('Default (Web API)')}</option>
<option value="openai">{$i18n.t('Open AI')}</option>
</select>
</div>
</div>
{#if $user.role === 'admin'}
{#if TTSEngine === 'openai'}
<div class="mt-1 flex gap-2 mb-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Base URL')}
bind:value={OpenAIUrl}
required
/>
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Key')}
bind:value={OpenAIKey}
required
/>
</div>
{/if}
{/if}
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
@@ -277,21 +151,21 @@
<hr class=" dark:border-gray-700" />
{#if TTSEngine === ''}
{#if $config.audio.tts.engine === ''}
<div>
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
<div class="flex w-full">
<div class="flex-1">
<select
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={speaker}
bind:value={voice}
>
<option value="" selected={speaker !== ''}>{$i18n.t('Default')}</option>
{#each voices.filter((v) => nonLocalVoices || v.localService === true) as voice}
<option value="" selected={voice !== ''}>{$i18n.t('Default')}</option>
{#each voices.filter((v) => nonLocalVoices || v.localService === true) as _voice}
<option
value={voice.name}
value={_voice.name}
class="bg-gray-100 dark:bg-gray-700"
selected={speaker === voice.name}>{voice.name}</option
selected={voice === _voice.name}>{_voice.name}</option
>
{/each}
</select>
@@ -307,7 +181,7 @@
</div>
</div>
</div>
{:else if TTSEngine === 'openai'}
{:else if $config.audio.tts.engine === 'openai'}
<div>
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
<div class="flex w-full">
@@ -315,7 +189,7 @@
<input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={OpenAISpeaker}
bind:value={voice}
placeholder="Select a voice"
/>
@@ -327,25 +201,6 @@
</div>
</div>
</div>
<div>
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Model')}</div>
<div class="flex w-full">
<div class="flex-1">
<input
list="model-list"
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={model}
placeholder="Select a model"
/>
<datalist id="model-list">
{#each models as model}
<option value={model.name} />
{/each}
</datalist>
</div>
</div>
</div>
{/if}
</div>