diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index 24a5b680d..081415bc8 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -30,6 +30,7 @@ import { convertMessagesToHistory, copyToClipboard, + getMessageContentParts, extractSentencesForAudio, promptTemplate, splitStream @@ -926,18 +927,26 @@ navigator.vibrate(5); } - const sentences = extractSentencesForAudio(responseMessage.content); - sentences.pop(); + const messageContentParts = getMessageContentParts( + responseMessage.content, + $config?.audio?.tts?.split_on ?? 'punctuation' + ); + messageContentParts.pop(); // dispatch only last sentence and make sure it hasn't been dispatched before if ( - sentences.length > 0 && - sentences[sentences.length - 1] !== responseMessage.lastSentence + messageContentParts.length > 0 && + messageContentParts[messageContentParts.length - 1] !== + responseMessage.lastSentence ) { - responseMessage.lastSentence = sentences[sentences.length - 1]; + responseMessage.lastSentence = + messageContentParts[messageContentParts.length - 1]; eventTarget.dispatchEvent( new CustomEvent('chat', { - detail: { id: responseMessageId, content: sentences[sentences.length - 1] } + detail: { + id: responseMessageId, + content: messageContentParts[messageContentParts.length - 1] + } }) ); } @@ -1040,14 +1049,19 @@ stopResponseFlag = false; await tick(); - let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; - if (lastSentence) { + let lastMessageContentPart = + getMessageContentParts( + responseMessage.content, + $config?.audio?.tts?.split_on ?? 'punctuation' + )?.at(-1) ?? ''; + if (lastMessageContentPart) { eventTarget.dispatchEvent( new CustomEvent('chat', { - detail: { id: responseMessageId, content: lastSentence } + detail: { id: responseMessageId, content: lastMessageContentPart } }) ); } + eventTarget.dispatchEvent( new CustomEvent('chat:finish', { detail: { @@ -1247,18 +1261,24 @@ navigator.vibrate(5); } - const sentences = extractSentencesForAudio(responseMessage.content); - sentences.pop(); + const messageContentParts = getMessageContentParts( + responseMessage.content, + $config?.audio?.tts?.split_on ?? 'punctuation' + ); + messageContentParts.pop(); // dispatch only last sentence and make sure it hasn't been dispatched before if ( - sentences.length > 0 && - sentences[sentences.length - 1] !== responseMessage.lastSentence + messageContentParts.length > 0 && + messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence ) { - responseMessage.lastSentence = sentences[sentences.length - 1]; + responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1]; eventTarget.dispatchEvent( new CustomEvent('chat', { - detail: { id: responseMessageId, content: sentences[sentences.length - 1] } + detail: { + id: responseMessageId, + content: messageContentParts[messageContentParts.length - 1] + } }) ); } @@ -1313,11 +1333,15 @@ stopResponseFlag = false; await tick(); - let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; - if (lastSentence) { + let lastMessageContentPart = + getMessageContentParts( + responseMessage.content, + $config?.audio?.tts?.split_on ?? 'punctuation' + )?.at(-1) ?? ''; + if (lastMessageContentPart) { eventTarget.dispatchEvent( new CustomEvent('chat', { - detail: { id: responseMessageId, content: lastSentence } + detail: { id: responseMessageId, content: lastMessageContentPart } }) ); } diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 21daadb2c..a703375a4 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -16,7 +16,8 @@ approximateToHumanReadable, extractParagraphsForAudio, extractSentencesForAudio, - prepareTextForTTS, + cleanText, + getMessageContentParts } from '$lib/utils'; import { WEBUI_BASE_URL } from '$lib/constants'; @@ -35,7 +36,6 @@ import type { Writable } from 'svelte/store'; import type { i18n as i18nType } from 'i18next'; - import { TTS_RESPONSE_SPLIT } from '$lib/types'; interface MessageType { id: string; @@ -44,8 +44,20 @@ files?: { type: string; url: string }[]; timestamp: number; role: string; - statusHistory?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }[]; - status?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }; + statusHistory?: { + done: boolean; + action: string; + description: string; + urls?: string[]; + query?: string; + }[]; + status?: { + done: boolean; + action: string; + description: string; + urls?: string[]; + query?: string; + }; done: boolean; error?: boolean | { content: string }; citations?: string[]; @@ -61,7 +73,7 @@ total_duration?: number; load_duration?: number; }; - annotation?: { type: string; rating: number; }; + annotation?: { type: string; rating: number }; } export let message: MessageType; @@ -145,22 +157,12 @@ if ($config.audio.tts.engine !== '') { loadingSpeech = true; - const preparedMessageContent: string[] = []; + const messageContentParts: string[] = getMessageContentParts( + message.content, + $config?.audio?.tts?.split_on ?? 'punctuation' + ); - switch ($config.audio.tts.split_on) { - default: - case TTS_RESPONSE_SPLIT.PUNCTUATION: - preparedMessageContent.push(...extractSentencesForAudio(message.content)); - break; - case TTS_RESPONSE_SPLIT.PARAGRAPHS: - preparedMessageContent.push(...extractParagraphsForAudio(message.content)); - break; - case TTS_RESPONSE_SPLIT.NONE: - preparedMessageContent.push(prepareTextForTTS(message.content)); - break; - } - - if (!preparedMessageContent.length) { + if (!messageContentParts.length) { console.log('No content to speak'); toast.info($i18n.t('No content to speak')); @@ -169,16 +171,19 @@ return; } - console.debug('Prepared message content for TTS', preparedMessageContent); + console.debug('Prepared message content for TTS', messageContentParts); - audioParts = preparedMessageContent.reduce((acc, _sentence, idx) => { - acc[idx] = null; - return acc; - }, {} as typeof audioParts); + audioParts = messageContentParts.reduce( + (acc, _sentence, idx) => { + acc[idx] = null; + return acc; + }, + {} as typeof audioParts + ); let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately - for (const [idx, sentence] of preparedMessageContent.entries()) { + for (const [idx, sentence] of messageContentParts.entries()) { const res = await synthesizeOpenAISpeech( localStorage.token, $settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice @@ -212,8 +217,7 @@ const voice = voices ?.filter( - (v) => - v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice) + (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice) ) ?.at(0) ?? undefined; @@ -727,7 +731,7 @@ eval_duration: ${ Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' }ms
- approximate_total: ${approximateToHumanReadable((message.info.total_duration ?? 0))}`} + approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`} placement="top" > diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 35ed39888..b3db18548 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -1,6 +1,8 @@ import { v4 as uuidv4 } from 'uuid'; import sha256 from 'js-sha256'; + import { WEBUI_BASE_URL } from '$lib/constants'; +import { TTS_RESPONSE_SPLIT } from '$lib/types'; ////////////////////////// // Helper functions @@ -537,7 +539,7 @@ export const removeFormattings = (str: string) => { return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, ''); }; -export const prepareTextForTTS = (content: string) => { +export const cleanText = (content: string) => { return removeFormattings(removeEmojis(content.trim())); }; @@ -564,9 +566,7 @@ export const extractSentences = (text: string) => { return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); }); - return sentences - .map(prepareTextForTTS) - .filter(Boolean); + return sentences.map(cleanText).filter(Boolean); }; export const extractParagraphsForAudio = (text: string) => { @@ -589,9 +589,7 @@ export const extractParagraphsForAudio = (text: string) => { return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); }); - return paragraphs - .map(prepareTextForTTS) - .filter(Boolean); + return paragraphs.map(cleanText).filter(Boolean); }; export const extractSentencesForAudio = (text: string) => { @@ -613,6 +611,25 @@ export const extractSentencesForAudio = (text: string) => { }, [] as string[]); }; +export const getMessageContentParts = (content: string, split_on: string = 'punctuation') => { + const messageContentParts: string[] = []; + + switch (split_on) { + default: + case TTS_RESPONSE_SPLIT.PUNCTUATION: + messageContentParts.push(...extractSentencesForAudio(content)); + break; + case TTS_RESPONSE_SPLIT.PARAGRAPHS: + messageContentParts.push(...extractParagraphsForAudio(content)); + break; + case TTS_RESPONSE_SPLIT.NONE: + messageContentParts.push(cleanText(content)); + break; + } + + return messageContentParts; +}; + export const blobToFile = (blob, fileName) => { // Create a new File object from the Blob const file = new File([blob], fileName, { type: blob.type });