From 401799c6fa1cc374f55bcc6bcc099c1db7ed8f80 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 10 Feb 2024 19:20:56 -0800 Subject: [PATCH] feat: tts optimisation --- .../chat/Messages/ResponseMessage.svelte | 78 ++++++++++++------- src/lib/utils/index.ts | 17 ++++ 2 files changed, 69 insertions(+), 26 deletions(-) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index ba41ef626..5ab41fcdd 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -15,6 +15,7 @@ import CodeBlock from './CodeBlock.svelte'; import { synthesizeOpenAISpeech } from '$lib/apis/openai'; + import { extractSentences } from '$lib/utils'; export let modelfiles = []; export let message; @@ -35,8 +36,10 @@ let tooltipInstance = null; - let audioMap = {}; + let sentencesAudio = {}; let speaking = null; + let speakingIdx = null; + let loadingSpeech = false; $: tokens = marked.lexer(message.content); @@ -116,44 +119,67 @@ } }; + const playAudio = (idx) => { + return new Promise((res) => { + speakingIdx = idx; + const audio = sentencesAudio[idx]; + audio.play(); + audio.onended = async (e) => { + await new Promise((r) => setTimeout(r, 500)); + + if (Object.keys(sentencesAudio).length - 1 === idx) { + speaking = null; + } + + res(e); + }; + }); + }; + const toggleSpeakMessage = async () => { if (speaking) { speechSynthesis.cancel(); - speaking = null; - audioMap[message.id].pause(); - audioMap[message.id].currentTime = 0; + sentencesAudio[speakingIdx].pause(); + sentencesAudio[speakingIdx].currentTime = 0; + + speaking = null; + speakingIdx = null; } else { speaking = true; if ($settings?.speech?.engine === 'openai') { loadingSpeech = true; - const res = await synthesizeOpenAISpeech( - localStorage.token, - $settings?.speech?.speaker, - message.content - ).catch((error) => { - toast.error(error); - return null; - }); - if (res) { - const blob = await res.blob(); - const blobUrl = URL.createObjectURL(blob); - console.log(blobUrl); + const sentences = extractSentences(message.content); + console.log(sentences); - loadingSpeech = false; + sentencesAudio = sentences.reduce((a, e, i, arr) => { + a[i] = null; + return a; + }, {}); - const audio = new Audio(blobUrl); - audioMap[message.id] = audio; + let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately - audio.onended = () => { - speaking = null; - if ($settings.conversationMode) { - document.getElementById('voice-input-button')?.click(); - } - }; - audio.play().catch((e) => console.error('Error playing audio:', e)); + for (const [idx, sentence] of sentences.entries()) { + const res = await synthesizeOpenAISpeech( + localStorage.token, + $settings?.speech?.speaker, + sentence + ).catch((error) => { + toast.error(error); + return null; + }); + + if (res) { + const blob = await res.blob(); + const blobUrl = URL.createObjectURL(blob); + const audio = new Audio(blobUrl); + sentencesAudio[idx] = audio; + loadingSpeech = false; + + lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); + } } } else { let voices = []; diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 16bf1cd56..fa0963ea8 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => { return url.protocol === 'http:' || url.protocol === 'https:'; }; + +export const removeEmojis = (str) => { + // Regular expression to match emojis + const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g; + + // Replace emojis with an empty string + return str.replace(emojiRegex, ''); +}; + +export const extractSentences = (text) => { + // Split the paragraph into sentences based on common punctuation marks + const sentences = text.split(/(?<=[.!?])/); + + return sentences + .map((sentence) => removeEmojis(sentence.trim())) + .filter((sentence) => sentence !== ''); +};