diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte
index 24a5b680d..081415bc8 100644
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -30,6 +30,7 @@
import {
convertMessagesToHistory,
copyToClipboard,
+ getMessageContentParts,
extractSentencesForAudio,
promptTemplate,
splitStream
@@ -926,18 +927,26 @@
navigator.vibrate(5);
}
- const sentences = extractSentencesForAudio(responseMessage.content);
- sentences.pop();
+ const messageContentParts = getMessageContentParts(
+ responseMessage.content,
+ $config?.audio?.tts?.split_on ?? 'punctuation'
+ );
+ messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
- sentences.length > 0 &&
- sentences[sentences.length - 1] !== responseMessage.lastSentence
+ messageContentParts.length > 0 &&
+ messageContentParts[messageContentParts.length - 1] !==
+ responseMessage.lastSentence
) {
- responseMessage.lastSentence = sentences[sentences.length - 1];
+ responseMessage.lastSentence =
+ messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
- detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
+ detail: {
+ id: responseMessageId,
+ content: messageContentParts[messageContentParts.length - 1]
+ }
})
);
}
@@ -1040,14 +1049,19 @@
stopResponseFlag = false;
await tick();
- let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
- if (lastSentence) {
+ let lastMessageContentPart =
+ getMessageContentParts(
+ responseMessage.content,
+ $config?.audio?.tts?.split_on ?? 'punctuation'
+ )?.at(-1) ?? '';
+ if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
- detail: { id: responseMessageId, content: lastSentence }
+ detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}
+
eventTarget.dispatchEvent(
new CustomEvent('chat:finish', {
detail: {
@@ -1247,18 +1261,24 @@
navigator.vibrate(5);
}
- const sentences = extractSentencesForAudio(responseMessage.content);
- sentences.pop();
+ const messageContentParts = getMessageContentParts(
+ responseMessage.content,
+ $config?.audio?.tts?.split_on ?? 'punctuation'
+ );
+ messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
- sentences.length > 0 &&
- sentences[sentences.length - 1] !== responseMessage.lastSentence
+ messageContentParts.length > 0 &&
+ messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence
) {
- responseMessage.lastSentence = sentences[sentences.length - 1];
+ responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
- detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
+ detail: {
+ id: responseMessageId,
+ content: messageContentParts[messageContentParts.length - 1]
+ }
})
);
}
@@ -1313,11 +1333,15 @@
stopResponseFlag = false;
await tick();
- let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
- if (lastSentence) {
+ let lastMessageContentPart =
+ getMessageContentParts(
+ responseMessage.content,
+ $config?.audio?.tts?.split_on ?? 'punctuation'
+ )?.at(-1) ?? '';
+ if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
- detail: { id: responseMessageId, content: lastSentence }
+ detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}
diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte
index 21daadb2c..a703375a4 100644
--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@@ -16,7 +16,8 @@
approximateToHumanReadable,
extractParagraphsForAudio,
extractSentencesForAudio,
- prepareTextForTTS,
+ cleanText,
+ getMessageContentParts
} from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants';
@@ -35,7 +36,6 @@
import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
- import { TTS_RESPONSE_SPLIT } from '$lib/types';
interface MessageType {
id: string;
@@ -44,8 +44,20 @@
files?: { type: string; url: string }[];
timestamp: number;
role: string;
- statusHistory?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }[];
- status?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; };
+ statusHistory?: {
+ done: boolean;
+ action: string;
+ description: string;
+ urls?: string[];
+ query?: string;
+ }[];
+ status?: {
+ done: boolean;
+ action: string;
+ description: string;
+ urls?: string[];
+ query?: string;
+ };
done: boolean;
error?: boolean | { content: string };
citations?: string[];
@@ -61,7 +73,7 @@
total_duration?: number;
load_duration?: number;
};
- annotation?: { type: string; rating: number; };
+ annotation?: { type: string; rating: number };
}
export let message: MessageType;
@@ -145,22 +157,12 @@
if ($config.audio.tts.engine !== '') {
loadingSpeech = true;
- const preparedMessageContent: string[] = [];
+ const messageContentParts: string[] = getMessageContentParts(
+ message.content,
+ $config?.audio?.tts?.split_on ?? 'punctuation'
+ );
- switch ($config.audio.tts.split_on) {
- default:
- case TTS_RESPONSE_SPLIT.PUNCTUATION:
- preparedMessageContent.push(...extractSentencesForAudio(message.content));
- break;
- case TTS_RESPONSE_SPLIT.PARAGRAPHS:
- preparedMessageContent.push(...extractParagraphsForAudio(message.content));
- break;
- case TTS_RESPONSE_SPLIT.NONE:
- preparedMessageContent.push(prepareTextForTTS(message.content));
- break;
- }
-
- if (!preparedMessageContent.length) {
+ if (!messageContentParts.length) {
console.log('No content to speak');
toast.info($i18n.t('No content to speak'));
@@ -169,16 +171,19 @@
return;
}
- console.debug('Prepared message content for TTS', preparedMessageContent);
+ console.debug('Prepared message content for TTS', messageContentParts);
- audioParts = preparedMessageContent.reduce((acc, _sentence, idx) => {
- acc[idx] = null;
- return acc;
- }, {} as typeof audioParts);
+ audioParts = messageContentParts.reduce(
+ (acc, _sentence, idx) => {
+ acc[idx] = null;
+ return acc;
+ },
+ {} as typeof audioParts
+ );
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
- for (const [idx, sentence] of preparedMessageContent.entries()) {
+ for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
@@ -212,8 +217,7 @@
const voice =
voices
?.filter(
- (v) =>
- v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
+ (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
@@ -727,7 +731,7 @@
eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms
- approximate_total: ${approximateToHumanReadable((message.info.total_duration ?? 0))}`}
+ approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top"
>
diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts
index 35ed39888..b3db18548 100644
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@@ -1,6 +1,8 @@
import { v4 as uuidv4 } from 'uuid';
import sha256 from 'js-sha256';
+
import { WEBUI_BASE_URL } from '$lib/constants';
+import { TTS_RESPONSE_SPLIT } from '$lib/types';
//////////////////////////
// Helper functions
@@ -537,7 +539,7 @@ export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
};
-export const prepareTextForTTS = (content: string) => {
+export const cleanText = (content: string) => {
return removeFormattings(removeEmojis(content.trim()));
};
@@ -564,9 +566,7 @@ export const extractSentences = (text: string) => {
return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
- return sentences
- .map(prepareTextForTTS)
- .filter(Boolean);
+ return sentences.map(cleanText).filter(Boolean);
};
export const extractParagraphsForAudio = (text: string) => {
@@ -589,9 +589,7 @@ export const extractParagraphsForAudio = (text: string) => {
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
- return paragraphs
- .map(prepareTextForTTS)
- .filter(Boolean);
+ return paragraphs.map(cleanText).filter(Boolean);
};
export const extractSentencesForAudio = (text: string) => {
@@ -613,6 +611,25 @@ export const extractSentencesForAudio = (text: string) => {
}, [] as string[]);
};
+export const getMessageContentParts = (content: string, split_on: string = 'punctuation') => {
+ const messageContentParts: string[] = [];
+
+ switch (split_on) {
+ default:
+ case TTS_RESPONSE_SPLIT.PUNCTUATION:
+ messageContentParts.push(...extractSentencesForAudio(content));
+ break;
+ case TTS_RESPONSE_SPLIT.PARAGRAPHS:
+ messageContentParts.push(...extractParagraphsForAudio(content));
+ break;
+ case TTS_RESPONSE_SPLIT.NONE:
+ messageContentParts.push(cleanText(content));
+ break;
+ }
+
+ return messageContentParts;
+};
+
export const blobToFile = (blob, fileName) => {
// Create a new File object from the Blob
const file = new File([blob], fileName, { type: blob.type });