Merge pull request #4886 from kiosion/dev

feat: Add control for how message content is split for TTS generation requests
This commit is contained in:
Timothy Jaeryang Baek
2024-08-26 15:02:30 +02:00
committed by GitHub
14 changed files with 424 additions and 236 deletions

View File

@@ -3,13 +3,13 @@
import { toast } from 'svelte-sonner';
import mermaid from 'mermaid';
import { getContext, onMount, tick } from 'svelte';
import { getContext, onDestroy, onMount, tick } from 'svelte';
import { goto } from '$app/navigation';
import { page } from '$app/stores';
import type { Writable } from 'svelte/store';
import type { Unsubscriber, Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
import { WEBUI_BASE_URL } from '$lib/constants';
import {
chatId,
@@ -19,31 +19,26 @@
models,
settings,
showSidebar,
tags as _tags,
WEBUI_NAME,
banners,
user,
socket,
showCallOverlay,
tools,
currentChatPage,
temporaryChatEnabled
} from '$lib/stores';
import {
convertMessagesToHistory,
copyToClipboard,
getMessageContentParts,
extractSentencesForAudio,
getUserPosition,
promptTemplate,
splitStream
} from '$lib/utils';
import { generateChatCompletion } from '$lib/apis/ollama';
import {
addTagById,
createNewChat,
deleteTagById,
getAllChatTags,
getChatById,
getChatList,
getTagsById,
@@ -66,8 +61,6 @@
import MessageInput from '$lib/components/chat/MessageInput.svelte';
import Messages from '$lib/components/chat/Messages.svelte';
import Navbar from '$lib/components/layout/Navbar.svelte';
import CallOverlay from './MessageInput/CallOverlay.svelte';
import { error } from '@sveltejs/kit';
import ChatControls from './ChatControls.svelte';
import EventConfirmDialog from '../common/ConfirmDialog.svelte';
@@ -118,6 +111,8 @@
let params = {};
let chatIdUnsubscriber: Unsubscriber | undefined;
$: if (history.currentId !== null) {
let _messages = [];
@@ -207,47 +202,51 @@
}
};
onMount(async () => {
const onMessageHandler = async (event) => {
if (event.origin === window.origin) {
// Replace with your iframe's origin
console.log('Message received from iframe:', event.data);
if (event.data.type === 'input:prompt') {
console.log(event.data.text);
const onMessageHandler = async (event: {
origin: string;
data: { type: string; text: string };
}) => {
if (event.origin !== window.origin) {
return;
}
const inputElement = document.getElementById('chat-textarea');
// Replace with your iframe's origin
if (event.data.type === 'input:prompt') {
console.debug(event.data.text);
if (inputElement) {
prompt = event.data.text;
inputElement.focus();
}
}
const inputElement = document.getElementById('chat-textarea');
if (event.data.type === 'action:submit') {
console.log(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(prompt);
}
}
if (event.data.type === 'input:prompt:submit') {
console.log(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(event.data.text);
}
}
if (inputElement) {
prompt = event.data.text;
inputElement.focus();
}
};
window.addEventListener('message', onMessageHandler);
}
$socket.on('chat-events', chatEventHandler);
if (event.data.type === 'action:submit') {
console.debug(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(prompt);
}
}
if (event.data.type === 'input:prompt:submit') {
console.debug(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(event.data.text);
}
}
};
onMount(async () => {
window.addEventListener('message', onMessageHandler);
$socket?.on('chat-events', chatEventHandler);
if (!$chatId) {
chatId.subscribe(async (value) => {
chatIdUnsubscriber = chatId.subscribe(async (value) => {
if (!value) {
await initNewChat();
}
@@ -257,12 +256,12 @@
await goto('/');
}
}
});
return () => {
window.removeEventListener('message', onMessageHandler);
$socket.off('chat-events');
};
onDestroy(() => {
chatIdUnsubscriber?.();
window.removeEventListener('message', onMessageHandler);
$socket?.off('chat-events');
});
//////////////////////////
@@ -595,11 +594,11 @@
};
const sendPrompt = async (
prompt,
parentId,
prompt: string,
parentId: string,
{ modelId = null, modelIdx = null, newChat = false } = {}
) => {
let _responses = [];
let _responses: string[] = [];
// If modelId is provided, use it, else use selected model
let selectedModelIds = modelId
@@ -609,7 +608,7 @@
: selectedModels;
// Create response messages for each selected model
const responseMessageIds = {};
const responseMessageIds: Record<PropertyKey, string> = {};
for (const [_modelIdx, modelId] of selectedModelIds.entries()) {
const model = $models.filter((m) => m.id === modelId).at(0);
@@ -739,13 +738,13 @@
);
currentChatPage.set(1);
await chats.set(await getChatList(localStorage.token, $currentChatPage));
chats.set(await getChatList(localStorage.token, $currentChatPage));
return _responses;
};
const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
let _response = null;
let _response: string | null = null;
const responseMessage = history.messages[responseMessageId];
const userMessage = history.messages[responseMessage.parentId];
@@ -776,7 +775,7 @@
...messages
]
.filter((message) => message?.content?.trim())
.map((message, idx, arr) => {
.map((message) => {
// Prepare the base message object
const baseMessage = {
role: message.role,
@@ -928,18 +927,26 @@
navigator.vibrate(5);
}
const sentences = extractSentencesForAudio(responseMessage.content);
sentences.pop();
const messageContentParts = getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
sentences.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence
messageContentParts.length > 0 &&
messageContentParts[messageContentParts.length - 1] !==
responseMessage.lastSentence
) {
responseMessage.lastSentence = sentences[sentences.length - 1];
responseMessage.lastSentence =
messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
})
);
}
@@ -1042,14 +1049,19 @@
stopResponseFlag = false;
await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
if (lastSentence) {
let lastMessageContentPart =
getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence }
detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}
eventTarget.dispatchEvent(
new CustomEvent('chat:finish', {
detail: {
@@ -1249,18 +1261,24 @@
navigator.vibrate(5);
}
const sentences = extractSentencesForAudio(responseMessage.content);
sentences.pop();
const messageContentParts = getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
sentences.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence
messageContentParts.length > 0 &&
messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence
) {
responseMessage.lastSentence = sentences[sentences.length - 1];
responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
})
);
}
@@ -1315,11 +1333,15 @@
stopResponseFlag = false;
await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
if (lastSentence) {
let lastMessageContentPart =
getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence }
detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}

View File

@@ -2,11 +2,10 @@
import { toast } from 'svelte-sonner';
import dayjs from 'dayjs';
import { fade } from 'svelte/transition';
import { createEventDispatcher } from 'svelte';
import { onMount, tick, getContext } from 'svelte';
const i18n = getContext('i18n');
const i18n = getContext<Writable<i18nType>>('i18n');
const dispatch = createEventDispatcher();
@@ -15,20 +14,19 @@
import { imageGenerations } from '$lib/apis/images';
import {
approximateToHumanReadable,
extractSentences,
replaceTokens,
processResponseContent
extractParagraphsForAudio,
extractSentencesForAudio,
cleanText,
getMessageContentParts
} from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants';
import Name from './Name.svelte';
import ProfileImage from './ProfileImage.svelte';
import Skeleton from './Skeleton.svelte';
import CodeBlock from './CodeBlock.svelte';
import Image from '$lib/components/common/Image.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte';
import RateComment from './RateComment.svelte';
import CitationsModal from '$lib/components/chat/Messages/CitationsModal.svelte';
import Spinner from '$lib/components/common/Spinner.svelte';
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
import Sparkles from '$lib/components/icons/Sparkles.svelte';
@@ -36,7 +34,49 @@
import Error from './Error.svelte';
import Citations from './Citations.svelte';
export let message;
import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
interface MessageType {
id: string;
model: string;
content: string;
files?: { type: string; url: string }[];
timestamp: number;
role: string;
statusHistory?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
}[];
status?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
};
done: boolean;
error?: boolean | { content: string };
citations?: string[];
info?: {
openai?: boolean;
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
total_duration?: number;
load_duration?: number;
};
annotation?: { type: string; rating: number };
}
export let message: MessageType;
export let siblings;
export let isLastMessage = true;
@@ -60,28 +100,33 @@
let editedContent = '';
let editTextAreaElement: HTMLTextAreaElement;
let sentencesAudio = {};
let speaking = null;
let speakingIdx = null;
let audioParts: Record<number, HTMLAudioElement | null> = {};
let speaking = false;
let speakingIdx: number | undefined;
let loadingSpeech = false;
let generatingImage = false;
let showRateComment = false;
const playAudio = (idx) => {
return new Promise((res) => {
const playAudio = (idx: number) => {
return new Promise<void>((res) => {
speakingIdx = idx;
const audio = sentencesAudio[idx];
const audio = audioParts[idx];
if (!audio) {
return res();
}
audio.play();
audio.onended = async (e) => {
audio.onended = async () => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(sentencesAudio).length - 1 === idx) {
speaking = null;
if (Object.keys(audioParts).length - 1 === idx) {
speaking = false;
}
res(e);
res();
};
});
};
@@ -91,113 +136,111 @@
try {
speechSynthesis.cancel();
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
audioParts[speakingIdx]!.pause();
audioParts[speakingIdx]!.currentTime = 0;
}
} catch {}
speaking = null;
speakingIdx = null;
} else {
if ((message?.content ?? '').trim() !== '') {
speaking = true;
speaking = false;
speakingIdx = undefined;
return;
}
if ($config.audio.tts.engine !== '') {
loadingSpeech = true;
if (!(message?.content ?? '').trim().length) {
toast.info($i18n.t('No content to speak'));
return;
}
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
speaking = true;
console.log(sentences);
if ($config.audio.tts.engine !== '') {
loadingSpeech = true;
if (sentences.length > 0) {
sentencesAudio = sentences.reduce((a, e, i, arr) => {
a[i] = null;
return a;
}, {});
const messageContentParts: string[] = getMessageContentParts(
message.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
if (!messageContentParts.length) {
console.log('No content to speak');
toast.info($i18n.t('No content to speak'));
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
sentence
).catch((error) => {
toast.error(error);
speaking = null;
loadingSpeech = false;
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
} else {
speaking = null;
loadingSpeech = false;
}
} else {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) =>
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
console.log(voice);
const speak = new SpeechSynthesisUtterance(message.content);
console.log(speak);
speak.onend = () => {
speaking = null;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
}
};
if (voice) {
speak.voice = voice;
}
speechSynthesis.speak(speak);
}
}, 100);
}
} else {
toast.error($i18n.t('No content to speak'));
speaking = false;
loadingSpeech = false;
return;
}
console.debug('Prepared message content for TTS', messageContentParts);
audioParts = messageContentParts.reduce(
(acc, _sentence, idx) => {
acc[idx] = null;
return acc;
},
{} as typeof audioParts
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
sentence
).catch((error) => {
console.error(error);
toast.error(error);
speaking = false;
loadingSpeech = false;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
audioParts[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
} else {
let voices = [];
const getVoicesLoop = setInterval(() => {
voices = speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
console.log(voice);
const speak = new SpeechSynthesisUtterance(message.content);
console.log(speak);
speak.onend = () => {
speaking = false;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
}
};
if (voice) {
speak.voice = voice;
}
speechSynthesis.speak(speak);
}
}, 100);
}
};
@@ -230,7 +273,7 @@
await tick();
};
const generateImage = async (message) => {
const generateImage = async (message: MessageType) => {
generatingImage = true;
const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
toast.error(error);
@@ -285,7 +328,7 @@
</Name>
<div>
{#if (message?.files ?? []).filter((f) => f.type === 'image').length > 0}
{#if message?.files && message.files?.filter((f) => f.type === 'image').length > 0}
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
{#each message.files as file}
<div>
@@ -304,7 +347,7 @@
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
).at(-1)}
<div class="flex items-center gap-2 pt-0.5 pb-1">
{#if status.done === false}
{#if status?.done === false}
<div class="">
<Spinner className="size-4" />
</div>
@@ -521,7 +564,7 @@
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
on:click={() => {
if (!loadingSpeech) {
toggleSpeakMessage(message);
toggleSpeakMessage();
}
}}
>
@@ -661,7 +704,7 @@
`${
Math.round(
((message.info.eval_count ?? 0) /
(message.info.eval_duration / 1000000000)) *
((message.info.eval_duration ?? 0) / 1000000000)) *
100
) / 100
} tokens` ?? 'N/A'
@@ -669,7 +712,7 @@
prompt_token/s: ${
Math.round(
((message.info.prompt_eval_count ?? 0) /
(message.info.prompt_eval_duration / 1000000000)) *
((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
100
) / 100 ?? 'N/A'
} tokens<br/>
@@ -688,7 +731,7 @@
eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/>
approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`}
approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top"
>
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">