diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte
index 44a221ba6..c9ee74aa2 100644
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -30,6 +30,7 @@
import {
convertMessagesToHistory,
copyToClipboard,
+ extractSentencesForAudio,
promptTemplate,
splitStream
} from '$lib/utils';
@@ -593,7 +594,15 @@
array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index
);
- eventTarget.dispatchEvent(new CustomEvent('chat:start'));
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat:start', {
+ detail: {
+ id: responseMessageId
+ }
+ })
+ );
+
+ await tick();
const [res, controller] = await generateChatCompletion(localStorage.token, {
model: model.id,
@@ -664,9 +673,23 @@
continue;
} else {
responseMessage.content += data.message.content;
- eventTarget.dispatchEvent(
- new CustomEvent('chat', { detail: { content: data.message.content } })
- );
+
+ const sentences = extractSentencesForAudio(responseMessage.content);
+ sentences.pop();
+
+ // dispatch only last sentence and make sure it hasn't been dispatched before
+ if (
+ sentences.length > 0 &&
+ sentences[sentences.length - 1] !== responseMessage.lastSentence
+ ) {
+ responseMessage.lastSentence = sentences[sentences.length - 1];
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat', {
+ detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
+ })
+ );
+ }
+
messages = messages;
}
} else {
@@ -760,7 +783,23 @@
stopResponseFlag = false;
await tick();
- eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
+
+ let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
+ if (lastSentence) {
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat', {
+ detail: { id: responseMessageId, content: lastSentence }
+ })
+ );
+ }
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat:finish', {
+ detail: {
+ id: responseMessageId,
+ content: responseMessage.content
+ }
+ })
+ );
if (autoScroll) {
scrollToBottom();
@@ -802,7 +841,14 @@
scrollToBottom();
- eventTarget.dispatchEvent(new CustomEvent('chat:start'));
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat:start', {
+ detail: {
+ id: responseMessageId
+ }
+ })
+ );
+ await tick();
try {
const [res, controller] = await generateOpenAIChatCompletion(
@@ -924,7 +970,23 @@
continue;
} else {
responseMessage.content += value;
- eventTarget.dispatchEvent(new CustomEvent('chat', { detail: { content: value } }));
+
+ const sentences = extractSentencesForAudio(responseMessage.content);
+ sentences.pop();
+
+ // dispatch only last sentence and make sure it hasn't been dispatched before
+ if (
+ sentences.length > 0 &&
+ sentences[sentences.length - 1] !== responseMessage.lastSentence
+ ) {
+ responseMessage.lastSentence = sentences[sentences.length - 1];
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat', {
+ detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
+ })
+ );
+ }
+
messages = messages;
}
@@ -975,7 +1037,23 @@
stopResponseFlag = false;
await tick();
- eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
+ let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
+ if (lastSentence) {
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat', {
+ detail: { id: responseMessageId, content: lastSentence }
+ })
+ );
+ }
+
+ eventTarget.dispatchEvent(
+ new CustomEvent('chat:finish', {
+ detail: {
+ id: responseMessageId,
+ content: responseMessage.content
+ }
+ })
+ );
if (autoScroll) {
scrollToBottom();
@@ -1207,14 +1285,18 @@
-
+
+
+{#if $showCallOverlay}
+
+{/if}
{#if !chatIdProp || (loaded && chatIdProp)}
{
- const lastIndex = mergedTexts.length - 1;
- if (lastIndex >= 0) {
- const previousText = mergedTexts[lastIndex];
- const wordCount = previousText.split(/\s+/).length;
- if (wordCount < 2) {
- mergedTexts[lastIndex] = previousText + ' ' + currentText;
- } else {
- mergedTexts.push(currentText);
- }
- } else {
- mergedTexts.push(currentText);
- }
- return mergedTexts;
- }, []);
-
- let currentUtterance = null;
let rmsLevel = 0;
let hasStartedSpeaking = false;
@@ -170,75 +148,6 @@
const MIN_DECIBELS = -45;
const VISUALIZER_BUFFER_LENGTH = 300;
- // Function to calculate the RMS level from time domain data
- const calculateRMS = (data: Uint8Array) => {
- let sumSquares = 0;
- for (let i = 0; i < data.length; i++) {
- const normalizedValue = (data[i] - 128) / 128; // Normalize the data
- sumSquares += normalizedValue * normalizedValue;
- }
- return Math.sqrt(sumSquares / data.length);
- };
-
- const analyseAudio = (stream) => {
- const audioContext = new AudioContext();
- const audioStreamSource = audioContext.createMediaStreamSource(stream);
-
- const analyser = audioContext.createAnalyser();
- analyser.minDecibels = MIN_DECIBELS;
- audioStreamSource.connect(analyser);
-
- const bufferLength = analyser.frequencyBinCount;
-
- const domainData = new Uint8Array(bufferLength);
- const timeDomainData = new Uint8Array(analyser.fftSize);
-
- let lastSoundTime = Date.now();
- hasStartedSpeaking = false;
-
- const detectSound = () => {
- const processFrame = () => {
- if (!mediaRecorder || !$showCallOverlay) {
- return;
- }
-
- analyser.getByteTimeDomainData(timeDomainData);
- analyser.getByteFrequencyData(domainData);
-
- // Calculate RMS level from time domain data
- rmsLevel = calculateRMS(timeDomainData);
-
- // Check if initial speech/noise has started
- const hasSound = domainData.some((value) => value > 0);
- if (hasSound) {
- hasStartedSpeaking = true;
- lastSoundTime = Date.now();
-
- // BIG RED TEXT
- console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
- stopAllAudio();
- }
-
- // Start silence detection only after initial speech/noise has been detected
- if (hasStartedSpeaking) {
- if (Date.now() - lastSoundTime > 2000) {
- confirmed = true;
-
- if (mediaRecorder) {
- mediaRecorder.stop();
- }
- }
- }
-
- window.requestAnimationFrame(processFrame);
- };
-
- window.requestAnimationFrame(processFrame);
- };
-
- detectSound();
- };
-
const transcribeHandler = async (audioBlob) => {
// Create a blob from the audio chunks
@@ -260,174 +169,6 @@
}
};
- const stopAllAudio = async () => {
- interrupted = true;
-
- if (chatStreaming) {
- stopResponse();
- }
-
- if (currentUtterance) {
- speechSynthesis.cancel();
- currentUtterance = null;
- }
-
- await tick();
- emojiQueue = [];
- audioQueue = [];
- await tick();
-
- const audioElement = document.getElementById('audioElement');
- if (audioElement) {
- audioElement.pause();
- audioElement.currentTime = 0;
- }
-
- assistantSpeaking = false;
- };
-
- const speakSpeechSynthesisHandler = (content) => {
- if ($showCallOverlay) {
- return new Promise((resolve) => {
- let voices = [];
- const getVoicesLoop = setInterval(async () => {
- voices = await speechSynthesis.getVoices();
- if (voices.length > 0) {
- clearInterval(getVoicesLoop);
-
- const voice =
- voices
- ?.filter(
- (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
- )
- ?.at(0) ?? undefined;
-
- currentUtterance = new SpeechSynthesisUtterance(content);
-
- if (voice) {
- currentUtterance.voice = voice;
- }
-
- speechSynthesis.speak(currentUtterance);
- currentUtterance.onend = async (e) => {
- await new Promise((r) => setTimeout(r, 100));
- resolve(e);
- };
- }
- }, 100);
- });
- } else {
- return Promise.resolve();
- }
- };
-
- const playAudio = (audio) => {
- if ($showCallOverlay) {
- return new Promise((resolve) => {
- const audioElement = document.getElementById('audioElement');
-
- if (audioElement) {
- audioElement.src = audio.src;
- audioElement.muted = true;
-
- audioElement
- .play()
- .then(() => {
- audioElement.muted = false;
- })
- .catch((error) => {
- console.error(error);
- });
-
- audioElement.onended = async (e) => {
- await new Promise((r) => setTimeout(r, 100));
- resolve(e);
- };
- }
- });
- } else {
- return Promise.resolve();
- }
- };
-
- const playAudioHandler = async () => {
- console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
- if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
- assistantSpeaking = true;
-
- if ($settings?.showEmojiInCall ?? false) {
- if (emojiQueue.length > 0) {
- emoji = emojiQueue.shift();
- emojiQueue = emojiQueue;
- }
- }
-
- const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
- audioQueue = audioQueue;
- await playAudio(audioToPlay);
- assistantSpeaking = false;
- }
- };
-
- const setContentAudio = async (content, idx) => {
- if (assistantSentenceAudios[idx] === undefined) {
- // Wait for the previous audio to be loaded
- if (idx > 0) {
- await new Promise((resolve) => {
- const check = setInterval(() => {
- if (
- assistantSentenceAudios[idx - 1] !== undefined &&
- assistantSentenceAudios[idx - 1] !== null
- ) {
- clearInterval(check);
- resolve();
- }
- }, 100);
- });
- }
-
- assistantSentenceAudios[idx] = null;
-
- if ($settings?.showEmojiInCall ?? false) {
- const sentenceEmoji = await generateEmoji(localStorage.token, modelId, content);
-
- if (sentenceEmoji) {
- // Big red text with content and emoji
- console.log('%c%s', 'color: blue; font-size: 10px;', `${sentenceEmoji}: ${content}`);
-
- if (/\p{Extended_Pictographic}/u.test(sentenceEmoji)) {
- emojiQueue.push(sentenceEmoji.match(/\p{Extended_Pictographic}/gu)[0]);
- emojiQueue = emojiQueue;
- }
- }
-
- await tick();
- }
-
- const res = await synthesizeOpenAISpeech(
- localStorage.token,
- $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
- content
- ).catch((error) => {
- toast.error(error);
- assistantSpeaking = false;
- return null;
- });
-
- if (res) {
- const blob = await res.blob();
- const blobUrl = URL.createObjectURL(blob);
- const audio = new Audio(blobUrl);
- assistantSentenceAudios[idx] = audio;
-
- console.log('%c%s', 'color: red; font-size: 20px;', content);
-
- audioQueue.push(audio);
- audioQueue = audioQueue;
- }
- }
- };
-
const stopRecordingCallback = async (_continue = true) => {
if ($showCallOverlay) {
console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
@@ -489,107 +230,315 @@
mediaRecorder.start();
};
- const resetAssistantMessage = async () => {
- interrupted = false;
-
- assistantMessage = '';
- assistantSentenceIdx = -1;
- assistantSentenceAudios = {}; // Reset audio tracking
- audioQueue = []; // Clear the audio queue
- audioQueue = audioQueue;
-
- emoji = null;
- emojiQueue = [];
- emojiQueue = emojiQueue;
+ // Function to calculate the RMS level from time domain data
+ const calculateRMS = (data: Uint8Array) => {
+ let sumSquares = 0;
+ for (let i = 0; i < data.length; i++) {
+ const normalizedValue = (data[i] - 128) / 128; // Normalize the data
+ sumSquares += normalizedValue * normalizedValue;
+ }
+ return Math.sqrt(sumSquares / data.length);
};
- $: (async () => {
+ const analyseAudio = (stream) => {
+ const audioContext = new AudioContext();
+ const audioStreamSource = audioContext.createMediaStreamSource(stream);
+
+ const analyser = audioContext.createAnalyser();
+ analyser.minDecibels = MIN_DECIBELS;
+ audioStreamSource.connect(analyser);
+
+ const bufferLength = analyser.frequencyBinCount;
+
+ const domainData = new Uint8Array(bufferLength);
+ const timeDomainData = new Uint8Array(analyser.fftSize);
+
+ let lastSoundTime = Date.now();
+ hasStartedSpeaking = false;
+
+ const detectSound = () => {
+ const processFrame = () => {
+ if (!mediaRecorder || !$showCallOverlay) {
+ return;
+ }
+
+ analyser.getByteTimeDomainData(timeDomainData);
+ analyser.getByteFrequencyData(domainData);
+
+ // Calculate RMS level from time domain data
+ rmsLevel = calculateRMS(timeDomainData);
+
+ // Check if initial speech/noise has started
+ const hasSound = domainData.some((value) => value > 0);
+ if (hasSound) {
+ // BIG RED TEXT
+ console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
+
+ if (!hasStartedSpeaking) {
+ hasStartedSpeaking = true;
+ stopAllAudio();
+ }
+
+ lastSoundTime = Date.now();
+ }
+
+ // Start silence detection only after initial speech/noise has been detected
+ if (hasStartedSpeaking) {
+ if (Date.now() - lastSoundTime > 2000) {
+ confirmed = true;
+
+ if (mediaRecorder) {
+ mediaRecorder.stop();
+ }
+ }
+ }
+
+ window.requestAnimationFrame(processFrame);
+ };
+
+ window.requestAnimationFrame(processFrame);
+ };
+
+ detectSound();
+ };
+
+ let finishedMessages = {};
+ let currentMessageId = null;
+ let currentUtterance = null;
+
+ const speakSpeechSynthesisHandler = (content) => {
if ($showCallOverlay) {
- await resetAssistantMessage();
- await tick();
- startRecording();
+ return new Promise((resolve) => {
+ let voices = [];
+ const getVoicesLoop = setInterval(async () => {
+ voices = await speechSynthesis.getVoices();
+ if (voices.length > 0) {
+ clearInterval(getVoicesLoop);
+
+ const voice =
+ voices
+ ?.filter(
+ (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
+ )
+ ?.at(0) ?? undefined;
+
+ currentUtterance = new SpeechSynthesisUtterance(content);
+
+ if (voice) {
+ currentUtterance.voice = voice;
+ }
+
+ speechSynthesis.speak(currentUtterance);
+ currentUtterance.onend = async (e) => {
+ await new Promise((r) => setTimeout(r, 100));
+ resolve(e);
+ };
+ }
+ }, 100);
+ });
} else {
- stopCamera();
- stopAllAudio();
- stopRecordingCallback(false);
+ return Promise.resolve();
}
- })();
+ };
- $: {
- if (audioQueue.length > 0 && !assistantSpeaking) {
- playAudioHandler();
- }
- }
+ const playAudio = (audio) => {
+ if ($showCallOverlay) {
+ return new Promise((resolve) => {
+ const audioElement = document.getElementById('audioElement');
- onMount(() => {
- eventTarget.addEventListener('chat:start', async (e) => {
- if ($showCallOverlay) {
- console.log('Chat start event:', e);
- await resetAssistantMessage();
- await tick();
- chatStreaming = true;
- }
- });
+ if (audioElement) {
+ audioElement.src = audio.src;
+ audioElement.muted = true;
- eventTarget.addEventListener('chat', async (e) => {
- if ($showCallOverlay) {
- const { content } = e.detail;
- assistantMessage += content;
- await tick();
+ audioElement
+ .play()
+ .then(() => {
+ audioElement.muted = false;
+ })
+ .catch((error) => {
+ console.error(error);
+ });
- if (!interrupted) {
- if ($config.audio.tts.engine !== '') {
- assistantSentenceIdx = assistantSentences.length - 2;
-
- if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
- await tick();
- setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
- }
- }
+ audioElement.onended = async (e) => {
+ await new Promise((r) => setTimeout(r, 100));
+ resolve(e);
+ };
}
+ });
+ } else {
+ return Promise.resolve();
+ }
+ };
- chatStreaming = true;
+ const stopAllAudio = async () => {
+ interrupted = true;
+
+ if (chatStreaming) {
+ stopResponse();
+ }
+
+ if (currentUtterance) {
+ speechSynthesis.cancel();
+ currentUtterance = null;
+ }
+
+ const audioElement = document.getElementById('audioElement');
+ if (audioElement) {
+ audioElement.pause();
+ audioElement.currentTime = 0;
+ }
+ };
+
+ let audioAbortController = new AbortController();
+
+ // Audio cache map where key is the content and value is the Audio object.
+ const audioCache = new Map();
+ const fetchAudio = async (content) => {
+ if (!audioCache.has(content)) {
+ try {
+ const res = await synthesizeOpenAISpeech(
+ localStorage.token,
+ $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
+ content
+ ).catch((error) => {
+ console.error(error);
+ return null;
+ });
+
+ if (res) {
+ const blob = await res.blob();
+ const blobUrl = URL.createObjectURL(blob);
+ audioCache.set(content, new Audio(blobUrl));
+ }
+ } catch (error) {
+ console.error('Error synthesizing speech:', error);
}
- });
+ }
+ return audioCache.get(content);
+ };
- eventTarget.addEventListener('chat:finish', async (e) => {
- if ($showCallOverlay) {
- chatStreaming = false;
- loading = false;
+ let messages = {};
- console.log('Chat finish event:', e);
- await tick();
+ const monitorAndPlayAudio = async (id, signal) => {
+ while (!signal.aborted) {
+ if (messages[id] && messages[id].length > 0) {
+ // Retrieve the next content string from the queue
+ const content = messages[id].shift(); // Dequeues the content for playing
- if (!interrupted) {
- if ($config.audio.tts.engine !== '') {
- for (const [idx, sentence] of assistantSentences.entries()) {
- if (!assistantSentenceAudios[idx]) {
- await tick();
- setContentAudio(sentence, idx);
- }
- }
- } else {
- if ($settings?.showEmojiInCall ?? false) {
- const res = await generateEmoji(localStorage.token, modelId, assistantMessage);
+ if (audioCache.has(content)) {
+ // If content is available in the cache, play it
+ try {
+ console.log(
+ '%c%s',
+ 'color: red; font-size: 20px;',
+ `Playing audio for content: ${content}`
+ );
- if (res) {
- console.log(res);
- if (/\p{Extended_Pictographic}/u.test(res)) {
- emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
- }
- }
+ const audio = audioCache.get(content);
+ await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
+ console.log(`Played audio for content: ${content}`);
+ await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
+ } catch (error) {
+ console.error('Error playing audio:', error);
+ }
+ } else {
+ // If not available in the cache, push it back to the queue and delay
+ messages[id].unshift(content); // Re-queue the content at the start
+ console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
+ await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
+ }
+ } else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
+ // If the message is finished and there are no more messages to process, break the loop
+ break;
+ } else {
+ // No messages to process, sleep for a bit
+ await new Promise((resolve) => setTimeout(resolve, 200));
+ }
+ }
+ console.log(`Audio monitoring and playing stopped for message ID ${id}`);
+ };
+
+ onMount(async () => {
+ startRecording();
+
+ const chatStartHandler = async (e) => {
+ const { id } = e.detail;
+
+ chatStreaming = true;
+
+ if ($config.audio.tts.engine !== '') {
+ // set currentMessageId to id
+ if (currentMessageId !== id) {
+ console.log(`Received chat start event for message ID ${id}`);
+
+ currentMessageId = id;
+ if (audioAbortController) {
+ audioAbortController.abort();
+ }
+ audioAbortController = new AbortController();
+
+ // Start monitoring and playing audio for the message ID
+ monitorAndPlayAudio(id, audioAbortController.signal);
+ }
+ }
+ };
+
+ const chatEventHandler = async (e) => {
+ const { id, content } = e.detail;
+ // "id" here is message id
+ // if "id" is not the same as "currentMessageId" then do not process
+ // "content" here is a sentence from the assistant,
+ // there will be many sentences for the same "id"
+
+ if ($config.audio.tts.engine !== '') {
+ if (currentMessageId === id) {
+ console.log(`Received chat event for message ID ${id}: ${content}`);
+
+ try {
+ if (messages[id] === undefined) {
+ messages[id] = [content];
+ } else {
+ messages[id].push(content);
}
- speakSpeechSynthesisHandler(assistantMessage);
+ console.log(content);
+
+ fetchAudio(content);
+ } catch (error) {
+ console.error('Failed to fetch or play audio:', error);
}
}
}
- });
+ };
+
+ const chatFinishHandler = async (e) => {
+ const { id, content } = e.detail;
+ // "content" here is the entire message from the assistant
+
+ chatStreaming = false;
+
+ if ($config.audio.tts.engine !== '') {
+ finishedMessages[id] = true;
+ } else {
+ speakSpeechSynthesisHandler(content);
+ }
+ };
+
+ eventTarget.addEventListener('chat:start', chatStartHandler);
+ eventTarget.addEventListener('chat', chatEventHandler);
+ eventTarget.addEventListener('chat:finish', chatFinishHandler);
+
+ return async () => {
+ eventTarget.removeEventListener('chat:start', chatStartHandler);
+ eventTarget.removeEventListener('chat', chatEventHandler);
+ eventTarget.removeEventListener('chat:finish', chatFinishHandler);
+
+ await stopRecordingCallback(false);
+ await stopCamera();
+ };
});
-
-
{#if $showCallOverlay}
{
.filter((sentence) => sentence !== '');
};
+export const extractSentencesForAudio = (text) => {
+ return extractSentences(text).reduce((mergedTexts, currentText) => {
+ const lastIndex = mergedTexts.length - 1;
+ if (lastIndex >= 0) {
+ const previousText = mergedTexts[lastIndex];
+ const wordCount = previousText.split(/\s+/).length;
+ if (wordCount < 2) {
+ mergedTexts[lastIndex] = previousText + ' ' + currentText;
+ } else {
+ mergedTexts.push(currentText);
+ }
+ } else {
+ mergedTexts.push(currentText);
+ }
+ return mergedTexts;
+ }, []);
+};
+
export const blobToFile = (blob, fileName) => {
// Create a new File object from the Blob
const file = new File([blob], fileName, { type: blob.type });