refac: optimisation

2025-06-26 18:26:48 +00:00 · 2024-06-07 00:27:05 -07:00 · 2024-06-07 00:27:05 -07:00 · fc3a31e3d5
commit fc3a31e3d5
parent 4e640daf83
1 changed files with 110 additions and 19 deletions
--- a/src/lib/components/chat/MessageInput/CallOverlay.svelte
+++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte
@ -2,8 +2,8 @@
 	import { settings, showCallOverlay } from '$lib/stores';
 	import { onMount, tick, getContext } from 'svelte';
-	import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
+	import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
-	import { transcribeAudio } from '$lib/apis/audio';
+	import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
 	import { toast } from 'svelte-sonner';
 	const i18n = getContext('i18n');
@ -14,7 +14,8 @@
 	let confirmed = false;
 	let assistantSpeaking = false;
-	let assistantAudio = null;
+	let assistantAudio = {};
 	let assistantAudioIdx = null;
 	let rmsLevel = 0;
 	let hasStartedSpeaking = false;
@ -26,6 +27,7 @@
 	let animationFrameId;
 	let speechRecognition;
 	let currentUtterance = null;
 	let mediaRecorder;
 	let audioChunks = [];
@ -108,14 +110,7 @@
 				// Check if initial speech/noise has started
 				const hasSound = domainData.some((value) => value > 0);
 				if (hasSound) {
-					if (assistantSpeaking) {
+					stopAllAudio();
 						speechSynthesis.cancel();
 						if (assistantAudio) {
 							assistantAudio.pause();
 							assistantAudio.currentTime = 0;
 						}
 					}
 					hasStartedSpeaking = true;
 					lastSoundTime = Date.now();
 				}
@ -140,6 +135,55 @@
 		detectSound();
 	};
 	const stopAllAudio = () => {
 		if (currentUtterance) {
 			speechSynthesis.cancel();
 			currentUtterance = null;
 		}
 		if (assistantAudio[assistantAudioIdx]) {
 			assistantAudio[assistantAudioIdx].pause();
 			assistantAudio[assistantAudioIdx].currentTime = 0;
 		}
 		assistantSpeaking = false;
 	};
 	const playAudio = (idx) => {
 		return new Promise((res) => {
 			assistantAudioIdx = idx;
 			const audio = assistantAudio[idx];
 			audio.play();
 			audio.onended = async (e) => {
 				await new Promise((r) => setTimeout(r, 300));
 				if (Object.keys(assistantAudio).length - 1 === idx) {
 					assistantSpeaking = false;
 				}
 				res(e);
 			};
 		});
 	};
 	const getOpenAISpeech = async (text) => {
 		const res = await synthesizeOpenAISpeech(
 			localStorage.token,
 			$settings?.audio?.speaker ?? 'alloy',
 			text,
 			$settings?.audio?.model ?? 'tts-1'
 		).catch((error) => {
 			toast.error(error);
 			assistantSpeaking = false;
 			return null;
 		});
 		if (res) {
 			const blob = await res.blob();
 			const blobUrl = URL.createObjectURL(blob);
 			const audio = new Audio(blobUrl);
 			assistantAudio = audio;
 		}
 	};
 	const transcribeHandler = async (audioBlob) => {
 		// Create a blob from the audio chunks
@ -152,21 +196,68 @@
 		});
 		if (res) {
-			toast.success(res.text);
+			console.log(res.text);
 			const _responses = await submitPrompt(res.text);
 			console.log(_responses);
 			if (_responses.at(0)) {
-				const response = _responses[0];
+				const content = _responses[0];
-				if (response) {
+				if (content) {
 					assistantSpeakingHandler(content);
 				}
 			}
 		}
 	};
 	const assistantSpeakingHandler = async (content) => {
 		assistantSpeaking = true;
-					if ($settings?.audio?.TTSEngine ?? '') {
+		if (($settings?.audio?.TTSEngine ?? '') == '') {
-						speechSynthesis.speak(new SpeechSynthesisUtterance(response));
+			currentUtterance = new SpeechSynthesisUtterance(content);
-					} else {
+			speechSynthesis.speak(currentUtterance);
 		} else if ($settings?.audio?.TTSEngine === 'openai') {
 			console.log('openai');
 			const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
 				const lastIndex = mergedTexts.length - 1;
 				if (lastIndex >= 0) {
 					const previousText = mergedTexts[lastIndex];
 					const wordCount = previousText.split(/\s+/).length;
 					if (wordCount < 2) {
 						mergedTexts[lastIndex] = previousText + ' ' + currentText;
 					} else {
 						mergedTexts.push(currentText);
 					}
 				} else {
 					mergedTexts.push(currentText);
 				}
 				return mergedTexts;
 			}, []);
 			console.log(sentences);
 			let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
 			for (const [idx, sentence] of sentences.entries()) {
 				const res = await synthesizeOpenAISpeech(
 					localStorage.token,
 					$settings?.audio?.speaker,
 					sentence,
 					$settings?.audio?.model
 				).catch((error) => {
 					toast.error(error);
 					assistantSpeaking = false;
 					return null;
 				});
 				if (res) {
 					const blob = await res.blob();
 					const blobUrl = URL.createObjectURL(blob);
 					const audio = new Audio(blobUrl);
 					assistantAudio[idx] = audio;
 					lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
 				}
 			}
 		}
@ -311,7 +402,7 @@
 								{#if loading}
 									Thinking...
 								{:else}
-									Listening... {Math.round(rmsLevel * 100)}
+									Listening...
 								{/if}
 							</div>
 						</button>