enh/refac: read aloud audio queue
This commit is contained in:
@@ -27,6 +27,7 @@
|
||||
banners,
|
||||
user,
|
||||
socket,
|
||||
audioQueue,
|
||||
showControls,
|
||||
showCallOverlay,
|
||||
currentChatPage,
|
||||
@@ -43,6 +44,7 @@
|
||||
pinnedChats,
|
||||
showEmbeds
|
||||
} from '$lib/stores';
|
||||
|
||||
import {
|
||||
convertMessagesToHistory,
|
||||
copyToClipboard,
|
||||
@@ -53,6 +55,8 @@
|
||||
removeAllDetails,
|
||||
getCodeBlockContents
|
||||
} from '$lib/utils';
|
||||
import { AudioQueue } from '$lib/utils/audio';
|
||||
|
||||
import {
|
||||
createNewChat,
|
||||
getAllTags,
|
||||
@@ -529,17 +533,28 @@
|
||||
let showControlsSubscribe = null;
|
||||
let selectedFolderSubscribe = null;
|
||||
|
||||
const stopAudio = () => {
|
||||
try {
|
||||
speechSynthesis.cancel();
|
||||
$audioQueue.stop();
|
||||
} catch {}
|
||||
};
|
||||
|
||||
onMount(async () => {
|
||||
loading = true;
|
||||
console.log('mounted');
|
||||
window.addEventListener('message', onMessageHandler);
|
||||
$socket?.on('events', chatEventHandler);
|
||||
|
||||
audioQueue.set(new AudioQueue(document.getElementById('audioElement')));
|
||||
|
||||
pageSubscribe = page.subscribe(async (p) => {
|
||||
if (p.url.pathname === '/') {
|
||||
await tick();
|
||||
initNewChat();
|
||||
}
|
||||
|
||||
stopAudio();
|
||||
});
|
||||
|
||||
const storageChatInput = sessionStorage.getItem(
|
||||
@@ -621,6 +636,7 @@
|
||||
chatIdUnsubscriber?.();
|
||||
window.removeEventListener('message', onMessageHandler);
|
||||
$socket?.off('events', chatEventHandler);
|
||||
$audioQueue?.destroy();
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
@@ -2347,7 +2363,7 @@
|
||||
</title>
|
||||
</svelte:head>
|
||||
|
||||
<audio id="audioElement" src="" style="display: none;" />
|
||||
<audio id="audioElement" src="" style="display: none;"></audio>
|
||||
|
||||
<EventConfirmDialog
|
||||
bind:show={showEventConfirmation}
|
||||
|
||||
@@ -15,7 +15,15 @@
|
||||
import { getChatById } from '$lib/apis/chats';
|
||||
import { generateTags } from '$lib/apis';
|
||||
|
||||
import { config, models, settings, temporaryChatEnabled, TTSWorker, user } from '$lib/stores';
|
||||
import {
|
||||
audioQueue,
|
||||
config,
|
||||
models,
|
||||
settings,
|
||||
temporaryChatEnabled,
|
||||
TTSWorker,
|
||||
user
|
||||
} from '$lib/stores';
|
||||
import { synthesizeOpenAISpeech } from '$lib/apis/audio';
|
||||
import { imageGenerations } from '$lib/apis/images';
|
||||
import {
|
||||
@@ -156,7 +164,6 @@
|
||||
|
||||
let messageIndexEdit = false;
|
||||
|
||||
let audioParts: Record<number, HTMLAudioElement | null> = {};
|
||||
let speaking = false;
|
||||
let speakingIdx: number | undefined;
|
||||
|
||||
@@ -178,51 +185,25 @@
|
||||
}
|
||||
};
|
||||
|
||||
const playAudio = (idx: number) => {
|
||||
return new Promise<void>((res) => {
|
||||
speakingIdx = idx;
|
||||
const audio = audioParts[idx];
|
||||
const stopAudio = () => {
|
||||
try {
|
||||
speechSynthesis.cancel();
|
||||
$audioQueue.stop();
|
||||
} catch {}
|
||||
|
||||
if (!audio) {
|
||||
return res();
|
||||
}
|
||||
|
||||
audio.play();
|
||||
audio.onended = async () => {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
|
||||
if (Object.keys(audioParts).length - 1 === idx) {
|
||||
speaking = false;
|
||||
}
|
||||
|
||||
res();
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
const toggleSpeakMessage = async () => {
|
||||
if (speaking) {
|
||||
try {
|
||||
speechSynthesis.cancel();
|
||||
|
||||
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
|
||||
audioParts[speakingIdx]!.pause();
|
||||
audioParts[speakingIdx]!.currentTime = 0;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
speaking = false;
|
||||
speakingIdx = undefined;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
const speak = async () => {
|
||||
if (!(message?.content ?? '').trim().length) {
|
||||
toast.info($i18n.t('No content to speak'));
|
||||
return;
|
||||
}
|
||||
|
||||
speaking = true;
|
||||
|
||||
const content = removeAllDetails(message.content);
|
||||
|
||||
if ($config.audio.tts.engine === '') {
|
||||
@@ -241,12 +222,12 @@
|
||||
|
||||
console.log(voice);
|
||||
|
||||
const speak = new SpeechSynthesisUtterance(content);
|
||||
speak.rate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||
const speech = new SpeechSynthesisUtterance(content);
|
||||
speech.rate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||
|
||||
console.log(speak);
|
||||
console.log(speech);
|
||||
|
||||
speak.onend = () => {
|
||||
speech.onend = () => {
|
||||
speaking = false;
|
||||
if ($settings.conversationMode) {
|
||||
document.getElementById('voice-input-button')?.click();
|
||||
@@ -254,15 +235,21 @@
|
||||
};
|
||||
|
||||
if (voice) {
|
||||
speak.voice = voice;
|
||||
speech.voice = voice;
|
||||
}
|
||||
|
||||
speechSynthesis.speak(speak);
|
||||
speechSynthesis.speak(speech);
|
||||
}
|
||||
}, 100);
|
||||
} else {
|
||||
loadingSpeech = true;
|
||||
$audioQueue.setId(`${message.id}`);
|
||||
$audioQueue.setPlaybackRate($settings.audio?.tts?.playbackRate ?? 1);
|
||||
$audioQueue.onStopped = () => {
|
||||
speaking = false;
|
||||
speakingIdx = undefined;
|
||||
};
|
||||
|
||||
loadingSpeech = true;
|
||||
const messageContentParts: string[] = getMessageContentParts(
|
||||
content,
|
||||
$config?.audio?.tts?.split_on ?? 'punctuation'
|
||||
@@ -278,17 +265,6 @@
|
||||
}
|
||||
|
||||
console.debug('Prepared message content for TTS', messageContentParts);
|
||||
|
||||
audioParts = messageContentParts.reduce(
|
||||
(acc, _sentence, idx) => {
|
||||
acc[idx] = null;
|
||||
return acc;
|
||||
},
|
||||
{} as typeof audioParts
|
||||
);
|
||||
|
||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||
|
||||
if ($settings.audio?.tts?.engine === 'browser-kokoro') {
|
||||
if (!$TTSWorker) {
|
||||
await TTSWorker.set(
|
||||
@@ -315,12 +291,9 @@
|
||||
});
|
||||
|
||||
if (blob) {
|
||||
const audio = new Audio(blob);
|
||||
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||
|
||||
audioParts[idx] = audio;
|
||||
const url = URL.createObjectURL(blob);
|
||||
$audioQueue.enqueue(url);
|
||||
loadingSpeech = false;
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -341,13 +314,10 @@
|
||||
|
||||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
const audio = new Audio(blobUrl);
|
||||
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
audioParts[idx] = audio;
|
||||
$audioQueue.enqueue(url);
|
||||
loadingSpeech = false;
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -992,7 +962,11 @@
|
||||
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
|
||||
on:click={() => {
|
||||
if (!loadingSpeech) {
|
||||
toggleSpeakMessage();
|
||||
if (speaking) {
|
||||
stopAudio();
|
||||
} else {
|
||||
speak();
|
||||
}
|
||||
}
|
||||
}}
|
||||
>
|
||||
|
||||
Reference in New Issue
Block a user