This commit is contained in:
Timothy J. Baek 2024-08-26 15:01:29 +02:00
parent d78c35c9ba
commit f4f7adb377
3 changed files with 99 additions and 54 deletions

View File

@ -30,6 +30,7 @@
import { import {
convertMessagesToHistory, convertMessagesToHistory,
copyToClipboard, copyToClipboard,
getMessageContentParts,
extractSentencesForAudio, extractSentencesForAudio,
promptTemplate, promptTemplate,
splitStream splitStream
@ -926,18 +927,26 @@
navigator.vibrate(5); navigator.vibrate(5);
} }
const sentences = extractSentencesForAudio(responseMessage.content); const messageContentParts = getMessageContentParts(
sentences.pop(); responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before // dispatch only last sentence and make sure it hasn't been dispatched before
if ( if (
sentences.length > 0 && messageContentParts.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence messageContentParts[messageContentParts.length - 1] !==
responseMessage.lastSentence
) { ) {
responseMessage.lastSentence = sentences[sentences.length - 1]; responseMessage.lastSentence =
messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] } detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
}) })
); );
} }
@ -1040,14 +1049,19 @@
stopResponseFlag = false; stopResponseFlag = false;
await tick(); await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; let lastMessageContentPart =
if (lastSentence) { getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence } detail: { id: responseMessageId, content: lastMessageContentPart }
}) })
); );
} }
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat:finish', { new CustomEvent('chat:finish', {
detail: { detail: {
@ -1247,18 +1261,24 @@
navigator.vibrate(5); navigator.vibrate(5);
} }
const sentences = extractSentencesForAudio(responseMessage.content); const messageContentParts = getMessageContentParts(
sentences.pop(); responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before // dispatch only last sentence and make sure it hasn't been dispatched before
if ( if (
sentences.length > 0 && messageContentParts.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence
) { ) {
responseMessage.lastSentence = sentences[sentences.length - 1]; responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] } detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
}) })
); );
} }
@ -1313,11 +1333,15 @@
stopResponseFlag = false; stopResponseFlag = false;
await tick(); await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; let lastMessageContentPart =
if (lastSentence) { getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence } detail: { id: responseMessageId, content: lastMessageContentPart }
}) })
); );
} }

View File

@ -16,7 +16,8 @@
approximateToHumanReadable, approximateToHumanReadable,
extractParagraphsForAudio, extractParagraphsForAudio,
extractSentencesForAudio, extractSentencesForAudio,
prepareTextForTTS, cleanText,
getMessageContentParts
} from '$lib/utils'; } from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants';
@ -35,7 +36,6 @@
import type { Writable } from 'svelte/store'; import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next'; import type { i18n as i18nType } from 'i18next';
import { TTS_RESPONSE_SPLIT } from '$lib/types';
interface MessageType { interface MessageType {
id: string; id: string;
@ -44,8 +44,20 @@
files?: { type: string; url: string }[]; files?: { type: string; url: string }[];
timestamp: number; timestamp: number;
role: string; role: string;
statusHistory?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }[]; statusHistory?: {
status?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }; done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
}[];
status?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
};
done: boolean; done: boolean;
error?: boolean | { content: string }; error?: boolean | { content: string };
citations?: string[]; citations?: string[];
@ -61,7 +73,7 @@
total_duration?: number; total_duration?: number;
load_duration?: number; load_duration?: number;
}; };
annotation?: { type: string; rating: number; }; annotation?: { type: string; rating: number };
} }
export let message: MessageType; export let message: MessageType;
@ -145,22 +157,12 @@
if ($config.audio.tts.engine !== '') { if ($config.audio.tts.engine !== '') {
loadingSpeech = true; loadingSpeech = true;
const preparedMessageContent: string[] = []; const messageContentParts: string[] = getMessageContentParts(
message.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
switch ($config.audio.tts.split_on) { if (!messageContentParts.length) {
default:
case TTS_RESPONSE_SPLIT.PUNCTUATION:
preparedMessageContent.push(...extractSentencesForAudio(message.content));
break;
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
preparedMessageContent.push(...extractParagraphsForAudio(message.content));
break;
case TTS_RESPONSE_SPLIT.NONE:
preparedMessageContent.push(prepareTextForTTS(message.content));
break;
}
if (!preparedMessageContent.length) {
console.log('No content to speak'); console.log('No content to speak');
toast.info($i18n.t('No content to speak')); toast.info($i18n.t('No content to speak'));
@ -169,16 +171,19 @@
return; return;
} }
console.debug('Prepared message content for TTS', preparedMessageContent); console.debug('Prepared message content for TTS', messageContentParts);
audioParts = preparedMessageContent.reduce((acc, _sentence, idx) => { audioParts = messageContentParts.reduce(
(acc, _sentence, idx) => {
acc[idx] = null; acc[idx] = null;
return acc; return acc;
}, {} as typeof audioParts); },
{} as typeof audioParts
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of preparedMessageContent.entries()) { for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech( const res = await synthesizeOpenAISpeech(
localStorage.token, localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice $settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
@ -212,8 +217,7 @@
const voice = const voice =
voices voices
?.filter( ?.filter(
(v) => (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
) )
?.at(0) ?? undefined; ?.at(0) ?? undefined;
@ -727,7 +731,7 @@
eval_duration: ${ eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/> }ms<br/>
approximate_total: ${approximateToHumanReadable((message.info.total_duration ?? 0))}`} approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top" placement="top"
> >
<Tooltip content={$i18n.t('Generation Info')} placement="bottom"> <Tooltip content={$i18n.t('Generation Info')} placement="bottom">

View File

@ -1,6 +1,8 @@
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';
import sha256 from 'js-sha256'; import sha256 from 'js-sha256';
import { WEBUI_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants';
import { TTS_RESPONSE_SPLIT } from '$lib/types';
////////////////////////// //////////////////////////
// Helper functions // Helper functions
@ -537,7 +539,7 @@ export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, ''); return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
}; };
export const prepareTextForTTS = (content: string) => { export const cleanText = (content: string) => {
return removeFormattings(removeEmojis(content.trim())); return removeFormattings(removeEmojis(content.trim()));
}; };
@ -564,9 +566,7 @@ export const extractSentences = (text: string) => {
return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
}); });
return sentences return sentences.map(cleanText).filter(Boolean);
.map(prepareTextForTTS)
.filter(Boolean);
}; };
export const extractParagraphsForAudio = (text: string) => { export const extractParagraphsForAudio = (text: string) => {
@ -589,9 +589,7 @@ export const extractParagraphsForAudio = (text: string) => {
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
}); });
return paragraphs return paragraphs.map(cleanText).filter(Boolean);
.map(prepareTextForTTS)
.filter(Boolean);
}; };
export const extractSentencesForAudio = (text: string) => { export const extractSentencesForAudio = (text: string) => {
@ -613,6 +611,25 @@ export const extractSentencesForAudio = (text: string) => {
}, [] as string[]); }, [] as string[]);
}; };
export const getMessageContentParts = (content: string, split_on: string = 'punctuation') => {
const messageContentParts: string[] = [];
switch (split_on) {
default:
case TTS_RESPONSE_SPLIT.PUNCTUATION:
messageContentParts.push(...extractSentencesForAudio(content));
break;
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
messageContentParts.push(...extractParagraphsForAudio(content));
break;
case TTS_RESPONSE_SPLIT.NONE:
messageContentParts.push(cleanText(content));
break;
}
return messageContentParts;
};
export const blobToFile = (blob, fileName) => { export const blobToFile = (blob, fileName) => {
// Create a new File object from the Blob // Create a new File object from the Blob
const file = new File([blob], fileName, { type: blob.type }); const file = new File([blob], fileName, { type: blob.type });