refac: voice call

2024-11-17 14:02:51 +00:00 · 2024-06-13 20:15:23 -07:00 · 2024-06-13 20:15:23 -07:00 · 7f70de99d3
commit 7f70de99d3
parent 7ea572fdca
3 changed files with 407 additions and 358 deletions
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@ -30,6 +30,7 @@
 	import {
 		convertMessagesToHistory,
 		copyToClipboard,
 		extractSentencesForAudio,
 		promptTemplate,
 		splitStream
 	} from '$lib/utils';
@ -593,7 +594,15 @@
 				array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index
 		);
-		eventTarget.dispatchEvent(new CustomEvent('chat:start'));
+		eventTarget.dispatchEvent(
 			new CustomEvent('chat:start', {
 				detail: {
 					id: responseMessageId
 				}
 			})
 		);
 		await tick();
 		const [res, controller] = await generateChatCompletion(localStorage.token, {
 			model: model.id,
@ -664,9 +673,23 @@
 									continue;
 								} else {
 									responseMessage.content += data.message.content;
-									eventTarget.dispatchEvent(
+
-										new CustomEvent('chat', { detail: { content: data.message.content } })
+									const sentences = extractSentencesForAudio(responseMessage.content);
-									);
+									sentences.pop();
 									// dispatch only last sentence and make sure it hasn't been dispatched before
 									if (
 										sentences.length > 0 &&
 										sentences[sentences.length - 1] !== responseMessage.lastSentence
 									) {
 										responseMessage.lastSentence = sentences[sentences.length - 1];
 										eventTarget.dispatchEvent(
 											new CustomEvent('chat', {
 												detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
 											})
 										);
 									}
 									messages = messages;
 								}
 							} else {
@ -760,7 +783,23 @@
 		stopResponseFlag = false;
 		await tick();
-		eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
+
 		let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
 		if (lastSentence) {
 			eventTarget.dispatchEvent(
 				new CustomEvent('chat', {
 					detail: { id: responseMessageId, content: lastSentence }
 				})
 			);
 		}
 		eventTarget.dispatchEvent(
 			new CustomEvent('chat:finish', {
 				detail: {
 					id: responseMessageId,
 					content: responseMessage.content
 				}
 			})
 		);
 		if (autoScroll) {
 			scrollToBottom();
@ -802,7 +841,14 @@
 		scrollToBottom();
-		eventTarget.dispatchEvent(new CustomEvent('chat:start'));
+		eventTarget.dispatchEvent(
 			new CustomEvent('chat:start', {
 				detail: {
 					id: responseMessageId
 				}
 			})
 		);
 		await tick();
 		try {
 			const [res, controller] = await generateOpenAIChatCompletion(
@ -924,7 +970,23 @@
 						continue;
 					} else {
 						responseMessage.content += value;
-						eventTarget.dispatchEvent(new CustomEvent('chat', { detail: { content: value } }));
+
 						const sentences = extractSentencesForAudio(responseMessage.content);
 						sentences.pop();
 						// dispatch only last sentence and make sure it hasn't been dispatched before
 						if (
 							sentences.length > 0 &&
 							sentences[sentences.length - 1] !== responseMessage.lastSentence
 						) {
 							responseMessage.lastSentence = sentences[sentences.length - 1];
 							eventTarget.dispatchEvent(
 								new CustomEvent('chat', {
 									detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
 								})
 							);
 						}
 						messages = messages;
 					}
@ -975,7 +1037,23 @@
 		stopResponseFlag = false;
 		await tick();
-		eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
+		let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
 		if (lastSentence) {
 			eventTarget.dispatchEvent(
 				new CustomEvent('chat', {
 					detail: { id: responseMessageId, content: lastSentence }
 				})
 			);
 		}
 		eventTarget.dispatchEvent(
 			new CustomEvent('chat:finish', {
 				detail: {
 					id: responseMessageId,
 					content: responseMessage.content
 				}
 			})
 		);
 		if (autoScroll) {
 			scrollToBottom();
@ -1207,14 +1285,18 @@
 	</title>
 </svelte:head>
-<CallOverlay
+<audio id="audioElement" src="" style="display: none;" />
-	{submitPrompt}
+
-	{stopResponse}
+{#if $showCallOverlay}
-	bind:files
+	<CallOverlay
-	modelId={selectedModelIds?.at(0) ?? null}
+		{submitPrompt}
-	chatId={$chatId}
+		{stopResponse}
-	{eventTarget}
+		bind:files
-/>
+		modelId={selectedModelIds?.at(0) ?? null}
 		chatId={$chatId}
 		{eventTarget}
 	/>
 {/if}
 {#if !chatIdProp || (loaded && chatIdProp)}
 	<div
--- a/src/lib/components/chat/MessageInput/CallOverlay.svelte
+++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte
@ -2,7 +2,12 @@
 	import { config, settings, showCallOverlay } from '$lib/stores';
 	import { onMount, tick, getContext } from 'svelte';
-	import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
+	import {
 		blobToFile,
 		calculateSHA256,
 		extractSentencesForAudio,
 		findWordIndices
 	} from '$lib/utils';
 	import { generateEmoji } from '$lib/apis';
 	import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
@ -32,34 +37,7 @@
 	let camera = false;
 	let cameraStream = null;
 	let assistantSpeaking = false;
 	let chatStreaming = false;
 	let assistantMessage = '';
 	let assistantSentences = [];
 	let assistantSentenceAudios = {};
 	let assistantSentenceIdx = -1;
 	let audioQueue = [];
 	let emojiQueue = [];
 	$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
 		const lastIndex = mergedTexts.length - 1;
 		if (lastIndex >= 0) {
 			const previousText = mergedTexts[lastIndex];
 			const wordCount = previousText.split(/\s+/).length;
 			if (wordCount < 2) {
 				mergedTexts[lastIndex] = previousText + ' ' + currentText;
 			} else {
 				mergedTexts.push(currentText);
 			}
 		} else {
 			mergedTexts.push(currentText);
 		}
 		return mergedTexts;
 	}, []);
 	let currentUtterance = null;
 	let rmsLevel = 0;
 	let hasStartedSpeaking = false;
@ -170,75 +148,6 @@
 	const MIN_DECIBELS = -45;
 	const VISUALIZER_BUFFER_LENGTH = 300;
 	// Function to calculate the RMS level from time domain data
 	const calculateRMS = (data: Uint8Array) => {
 		let sumSquares = 0;
 		for (let i = 0; i < data.length; i++) {
 			const normalizedValue = (data[i] - 128) / 128; // Normalize the data
 			sumSquares += normalizedValue * normalizedValue;
 		}
 		return Math.sqrt(sumSquares / data.length);
 	};
 	const analyseAudio = (stream) => {
 		const audioContext = new AudioContext();
 		const audioStreamSource = audioContext.createMediaStreamSource(stream);
 		const analyser = audioContext.createAnalyser();
 		analyser.minDecibels = MIN_DECIBELS;
 		audioStreamSource.connect(analyser);
 		const bufferLength = analyser.frequencyBinCount;
 		const domainData = new Uint8Array(bufferLength);
 		const timeDomainData = new Uint8Array(analyser.fftSize);
 		let lastSoundTime = Date.now();
 		hasStartedSpeaking = false;
 		const detectSound = () => {
 			const processFrame = () => {
 				if (!mediaRecorder || !$showCallOverlay) {
 					return;
 				}
 				analyser.getByteTimeDomainData(timeDomainData);
 				analyser.getByteFrequencyData(domainData);
 				// Calculate RMS level from time domain data
 				rmsLevel = calculateRMS(timeDomainData);
 				// Check if initial speech/noise has started
 				const hasSound = domainData.some((value) => value > 0);
 				if (hasSound) {
 					hasStartedSpeaking = true;
 					lastSoundTime = Date.now();
 					// BIG RED TEXT
 					console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
 					stopAllAudio();
 				}
 				// Start silence detection only after initial speech/noise has been detected
 				if (hasStartedSpeaking) {
 					if (Date.now() - lastSoundTime > 2000) {
 						confirmed = true;
 						if (mediaRecorder) {
 							mediaRecorder.stop();
 						}
 					}
 				}
 				window.requestAnimationFrame(processFrame);
 			};
 			window.requestAnimationFrame(processFrame);
 		};
 		detectSound();
 	};
 	const transcribeHandler = async (audioBlob) => {
 		// Create a blob from the audio chunks
@ -260,174 +169,6 @@
 		}
 	};
 	const stopAllAudio = async () => {
 		interrupted = true;
 		if (chatStreaming) {
 			stopResponse();
 		}
 		if (currentUtterance) {
 			speechSynthesis.cancel();
 			currentUtterance = null;
 		}
 		await tick();
 		emojiQueue = [];
 		audioQueue = [];
 		await tick();
 		const audioElement = document.getElementById('audioElement');
 		if (audioElement) {
 			audioElement.pause();
 			audioElement.currentTime = 0;
 		}
 		assistantSpeaking = false;
 	};
 	const speakSpeechSynthesisHandler = (content) => {
 		if ($showCallOverlay) {
 			return new Promise((resolve) => {
 				let voices = [];
 				const getVoicesLoop = setInterval(async () => {
 					voices = await speechSynthesis.getVoices();
 					if (voices.length > 0) {
 						clearInterval(getVoicesLoop);
 						const voice =
 							voices
 								?.filter(
 									(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
 								)
 								?.at(0) ?? undefined;
 						currentUtterance = new SpeechSynthesisUtterance(content);
 						if (voice) {
 							currentUtterance.voice = voice;
 						}
 						speechSynthesis.speak(currentUtterance);
 						currentUtterance.onend = async (e) => {
 							await new Promise((r) => setTimeout(r, 100));
 							resolve(e);
 						};
 					}
 				}, 100);
 			});
 		} else {
 			return Promise.resolve();
 		}
 	};
 	const playAudio = (audio) => {
 		if ($showCallOverlay) {
 			return new Promise((resolve) => {
 				const audioElement = document.getElementById('audioElement');
 				if (audioElement) {
 					audioElement.src = audio.src;
 					audioElement.muted = true;
 					audioElement
 						.play()
 						.then(() => {
 							audioElement.muted = false;
 						})
 						.catch((error) => {
 							console.error(error);
 						});
 					audioElement.onended = async (e) => {
 						await new Promise((r) => setTimeout(r, 100));
 						resolve(e);
 					};
 				}
 			});
 		} else {
 			return Promise.resolve();
 		}
 	};
 	const playAudioHandler = async () => {
 		console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
 		if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
 			assistantSpeaking = true;
 			if ($settings?.showEmojiInCall ?? false) {
 				if (emojiQueue.length > 0) {
 					emoji = emojiQueue.shift();
 					emojiQueue = emojiQueue;
 				}
 			}
 			const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
 			audioQueue = audioQueue;
 			await playAudio(audioToPlay);
 			assistantSpeaking = false;
 		}
 	};
 	const setContentAudio = async (content, idx) => {
 		if (assistantSentenceAudios[idx] === undefined) {
 			// Wait for the previous audio to be loaded
 			if (idx > 0) {
 				await new Promise((resolve) => {
 					const check = setInterval(() => {
 						if (
 							assistantSentenceAudios[idx - 1] !== undefined &&
 							assistantSentenceAudios[idx - 1] !== null
 						) {
 							clearInterval(check);
 							resolve();
 						}
 					}, 100);
 				});
 			}
 			assistantSentenceAudios[idx] = null;
 			if ($settings?.showEmojiInCall ?? false) {
 				const sentenceEmoji = await generateEmoji(localStorage.token, modelId, content);
 				if (sentenceEmoji) {
 					// Big red text with content and emoji
 					console.log('%c%s', 'color: blue; font-size: 10px;', `${sentenceEmoji}: ${content}`);
 					if (/\p{Extended_Pictographic}/u.test(sentenceEmoji)) {
 						emojiQueue.push(sentenceEmoji.match(/\p{Extended_Pictographic}/gu)[0]);
 						emojiQueue = emojiQueue;
 					}
 				}
 				await tick();
 			}
 			const res = await synthesizeOpenAISpeech(
 				localStorage.token,
 				$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
 				content
 			).catch((error) => {
 				toast.error(error);
 				assistantSpeaking = false;
 				return null;
 			});
 			if (res) {
 				const blob = await res.blob();
 				const blobUrl = URL.createObjectURL(blob);
 				const audio = new Audio(blobUrl);
 				assistantSentenceAudios[idx] = audio;
 				console.log('%c%s', 'color: red; font-size: 20px;', content);
 				audioQueue.push(audio);
 				audioQueue = audioQueue;
 			}
 		}
 	};
 	const stopRecordingCallback = async (_continue = true) => {
 		if ($showCallOverlay) {
 			console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
@ -489,107 +230,315 @@
 		mediaRecorder.start();
 	};
-	const resetAssistantMessage = async () => {
+	// Function to calculate the RMS level from time domain data
-		interrupted = false;
+	const calculateRMS = (data: Uint8Array) => {
-
+		let sumSquares = 0;
-		assistantMessage = '';
+		for (let i = 0; i < data.length; i++) {
-		assistantSentenceIdx = -1;
+			const normalizedValue = (data[i] - 128) / 128; // Normalize the data
-		assistantSentenceAudios = {}; // Reset audio tracking
+			sumSquares += normalizedValue * normalizedValue;
-		audioQueue = []; // Clear the audio queue
+		}
-		audioQueue = audioQueue;
+		return Math.sqrt(sumSquares / data.length);
 		emoji = null;
 		emojiQueue = [];
 		emojiQueue = emojiQueue;
 	};
-	$: (async () => {
+	const analyseAudio = (stream) => {
 		const audioContext = new AudioContext();
 		const audioStreamSource = audioContext.createMediaStreamSource(stream);
 		const analyser = audioContext.createAnalyser();
 		analyser.minDecibels = MIN_DECIBELS;
 		audioStreamSource.connect(analyser);
 		const bufferLength = analyser.frequencyBinCount;
 		const domainData = new Uint8Array(bufferLength);
 		const timeDomainData = new Uint8Array(analyser.fftSize);
 		let lastSoundTime = Date.now();
 		hasStartedSpeaking = false;
 		const detectSound = () => {
 			const processFrame = () => {
 				if (!mediaRecorder || !$showCallOverlay) {
 					return;
 				}
 				analyser.getByteTimeDomainData(timeDomainData);
 				analyser.getByteFrequencyData(domainData);
 				// Calculate RMS level from time domain data
 				rmsLevel = calculateRMS(timeDomainData);
 				// Check if initial speech/noise has started
 				const hasSound = domainData.some((value) => value > 0);
 				if (hasSound) {
 					// BIG RED TEXT
 					console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
 					if (!hasStartedSpeaking) {
 						hasStartedSpeaking = true;
 						stopAllAudio();
 					}
 					lastSoundTime = Date.now();
 				}
 				// Start silence detection only after initial speech/noise has been detected
 				if (hasStartedSpeaking) {
 					if (Date.now() - lastSoundTime > 2000) {
 						confirmed = true;
 						if (mediaRecorder) {
 							mediaRecorder.stop();
 						}
 					}
 				}
 				window.requestAnimationFrame(processFrame);
 			};
 			window.requestAnimationFrame(processFrame);
 		};
 		detectSound();
 	};
 	let finishedMessages = {};
 	let currentMessageId = null;
 	let currentUtterance = null;
 	const speakSpeechSynthesisHandler = (content) => {
 		if ($showCallOverlay) {
-			await resetAssistantMessage();
+			return new Promise((resolve) => {
-			await tick();
+				let voices = [];
-			startRecording();
+				const getVoicesLoop = setInterval(async () => {
 					voices = await speechSynthesis.getVoices();
 					if (voices.length > 0) {
 						clearInterval(getVoicesLoop);
 						const voice =
 							voices
 								?.filter(
 									(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
 								)
 								?.at(0) ?? undefined;
 						currentUtterance = new SpeechSynthesisUtterance(content);
 						if (voice) {
 							currentUtterance.voice = voice;
 						}
 						speechSynthesis.speak(currentUtterance);
 						currentUtterance.onend = async (e) => {
 							await new Promise((r) => setTimeout(r, 100));
 							resolve(e);
 						};
 					}
 				}, 100);
 			});
 		} else {
-			stopCamera();
+			return Promise.resolve();
 			stopAllAudio();
 			stopRecordingCallback(false);
 		}
-	})();
+	};
-	$: {
+	const playAudio = (audio) => {
-		if (audioQueue.length > 0 && !assistantSpeaking) {
+		if ($showCallOverlay) {
-			playAudioHandler();
+			return new Promise((resolve) => {
-		}
+				const audioElement = document.getElementById('audioElement');
 	}
-	onMount(() => {
+				if (audioElement) {
-		eventTarget.addEventListener('chat:start', async (e) => {
+					audioElement.src = audio.src;
-			if ($showCallOverlay) {
+					audioElement.muted = true;
 				console.log('Chat start event:', e);
 				await resetAssistantMessage();
 				await tick();
 				chatStreaming = true;
 			}
 		});
-		eventTarget.addEventListener('chat', async (e) => {
+					audioElement
-			if ($showCallOverlay) {
+						.play()
-				const { content } = e.detail;
+						.then(() => {
-				assistantMessage += content;
+							audioElement.muted = false;
-				await tick();
+						})
 						.catch((error) => {
 							console.error(error);
 						});
-				if (!interrupted) {
+					audioElement.onended = async (e) => {
-					if ($config.audio.tts.engine !== '') {
+						await new Promise((r) => setTimeout(r, 100));
-						assistantSentenceIdx = assistantSentences.length - 2;
+						resolve(e);
-
+					};
 						if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
 							await tick();
 							setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
 						}
 					}
 				}
 			});
 		} else {
 			return Promise.resolve();
 		}
 	};
-				chatStreaming = true;
+	const stopAllAudio = async () => {
 		interrupted = true;
 		if (chatStreaming) {
 			stopResponse();
 		}
 		if (currentUtterance) {
 			speechSynthesis.cancel();
 			currentUtterance = null;
 		}
 		const audioElement = document.getElementById('audioElement');
 		if (audioElement) {
 			audioElement.pause();
 			audioElement.currentTime = 0;
 		}
 	};
 	let audioAbortController = new AbortController();
 	// Audio cache map where key is the content and value is the Audio object.
 	const audioCache = new Map();
 	const fetchAudio = async (content) => {
 		if (!audioCache.has(content)) {
 			try {
 				const res = await synthesizeOpenAISpeech(
 					localStorage.token,
 					$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
 					content
 				).catch((error) => {
 					console.error(error);
 					return null;
 				});
 				if (res) {
 					const blob = await res.blob();
 					const blobUrl = URL.createObjectURL(blob);
 					audioCache.set(content, new Audio(blobUrl));
 				}
 			} catch (error) {
 				console.error('Error synthesizing speech:', error);
 			}
-		});
+		}
 		return audioCache.get(content);
 	};
-		eventTarget.addEventListener('chat:finish', async (e) => {
+	let messages = {};
 			if ($showCallOverlay) {
 				chatStreaming = false;
 				loading = false;
-				console.log('Chat finish event:', e);
+	const monitorAndPlayAudio = async (id, signal) => {
-				await tick();
+		while (!signal.aborted) {
 			if (messages[id] && messages[id].length > 0) {
 				// Retrieve the next content string from the queue
 				const content = messages[id].shift(); // Dequeues the content for playing
-				if (!interrupted) {
+				if (audioCache.has(content)) {
-					if ($config.audio.tts.engine !== '') {
+					// If content is available in the cache, play it
-						for (const [idx, sentence] of assistantSentences.entries()) {
+					try {
-							if (!assistantSentenceAudios[idx]) {
+						console.log(
-								await tick();
+							'%c%s',
-								setContentAudio(sentence, idx);
+							'color: red; font-size: 20px;',
-							}
+							`Playing audio for content: ${content}`
-						}
+						);
 					} else {
 						if ($settings?.showEmojiInCall ?? false) {
 							const res = await generateEmoji(localStorage.token, modelId, assistantMessage);
-							if (res) {
+						const audio = audioCache.get(content);
-								console.log(res);
+						await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
-								if (/\p{Extended_Pictographic}/u.test(res)) {
+						console.log(`Played audio for content: ${content}`);
-									emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
+						await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
-								}
+					} catch (error) {
-							}
+						console.error('Error playing audio:', error);
 					}
 				} else {
 					// If not available in the cache, push it back to the queue and delay
 					messages[id].unshift(content); // Re-queue the content at the start
 					console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
 					await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
 				}
 			} else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
 				// If the message is finished and there are no more messages to process, break the loop
 				break;
 			} else {
 				// No messages to process, sleep for a bit
 				await new Promise((resolve) => setTimeout(resolve, 200));
 			}
 		}
 		console.log(`Audio monitoring and playing stopped for message ID ${id}`);
 	};
 	onMount(async () => {
 		startRecording();
 		const chatStartHandler = async (e) => {
 			const { id } = e.detail;
 			chatStreaming = true;
 			if ($config.audio.tts.engine !== '') {
 				// set currentMessageId to id
 				if (currentMessageId !== id) {
 					console.log(`Received chat start event for message ID ${id}`);
 					currentMessageId = id;
 					if (audioAbortController) {
 						audioAbortController.abort();
 					}
 					audioAbortController = new AbortController();
 					// Start monitoring and playing audio for the message ID
 					monitorAndPlayAudio(id, audioAbortController.signal);
 				}
 			}
 		};
 		const chatEventHandler = async (e) => {
 			const { id, content } = e.detail;
 			// "id" here is message id
 			// if "id" is not the same as "currentMessageId" then do not process
 			// "content" here is a sentence from the assistant,
 			// there will be many sentences for the same "id"
 			if ($config.audio.tts.engine !== '') {
 				if (currentMessageId === id) {
 					console.log(`Received chat event for message ID ${id}: ${content}`);
 					try {
 						if (messages[id] === undefined) {
 							messages[id] = [content];
 						} else {
 							messages[id].push(content);
 						}
-						speakSpeechSynthesisHandler(assistantMessage);
+						console.log(content);
 						fetchAudio(content);
 					} catch (error) {
 						console.error('Failed to fetch or play audio:', error);
 					}
 				}
 			}
-		});
+		};
 		const chatFinishHandler = async (e) => {
 			const { id, content } = e.detail;
 			// "content" here is the entire message from the assistant
 			chatStreaming = false;
 			if ($config.audio.tts.engine !== '') {
 				finishedMessages[id] = true;
 			} else {
 				speakSpeechSynthesisHandler(content);
 			}
 		};
 		eventTarget.addEventListener('chat:start', chatStartHandler);
 		eventTarget.addEventListener('chat', chatEventHandler);
 		eventTarget.addEventListener('chat:finish', chatFinishHandler);
 		return async () => {
 			eventTarget.removeEventListener('chat:start', chatStartHandler);
 			eventTarget.removeEventListener('chat', chatEventHandler);
 			eventTarget.removeEventListener('chat:finish', chatFinishHandler);
 			await stopRecordingCallback(false);
 			await stopCamera();
 		};
 	});
 </script>
 <audio id="audioElement" src="" style="display: none;" />
 {#if $showCallOverlay}
 	<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
 		<div
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@ -443,6 +443,24 @@ export const extractSentences = (text) => {
 		.filter((sentence) => sentence !== '');
 };
 export const extractSentencesForAudio = (text) => {
 	return extractSentences(text).reduce((mergedTexts, currentText) => {
 		const lastIndex = mergedTexts.length - 1;
 		if (lastIndex >= 0) {
 			const previousText = mergedTexts[lastIndex];
 			const wordCount = previousText.split(/\s+/).length;
 			if (wordCount < 2) {
 				mergedTexts[lastIndex] = previousText + ' ' + currentText;
 			} else {
 				mergedTexts.push(currentText);
 			}
 		} else {
 			mergedTexts.push(currentText);
 		}
 		return mergedTexts;
 	}, []);
 };
 export const blobToFile = (blob, fileName) => {
 	// Create a new File object from the Blob
 	const file = new File([blob], fileName, { type: blob.type });