Merge pull request #4886 from kiosion/dev

feat: Add control for how message content is split for TTS generation requests
This commit is contained in:
Timothy Jaeryang Baek 2024-08-26 15:02:30 +02:00 committed by GitHub
commit b148865ee8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 424 additions and 236 deletions

View File

@ -37,6 +37,7 @@ from config import (
AUDIO_TTS_ENGINE,
AUDIO_TTS_MODEL,
AUDIO_TTS_VOICE,
AUDIO_TTS_SPLIT_ON,
AppConfig,
CORS_ALLOW_ORIGIN,
)
@ -72,6 +73,7 @@ app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
@ -88,6 +90,7 @@ class TTSConfigForm(BaseModel):
ENGINE: str
MODEL: str
VOICE: str
SPLIT_ON: str
class STTConfigForm(BaseModel):
@ -139,6 +142,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
"ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
@ -159,6 +163,7 @@ async def update_audio_config(
app.state.config.TTS_ENGINE = form_data.tts.ENGINE
app.state.config.TTS_MODEL = form_data.tts.MODEL
app.state.config.TTS_VOICE = form_data.tts.VOICE
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
@ -173,6 +178,7 @@ async def update_audio_config(
"ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,

View File

@ -1484,3 +1484,9 @@ AUDIO_TTS_VOICE = PersistentConfig(
"audio.tts.voice",
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
)
AUDIO_TTS_SPLIT_ON = PersistentConfig(
"AUDIO_TTS_SPLIT_ON",
"audio.tts.split_on",
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
)

View File

@ -1933,6 +1933,7 @@ async def get_app_config(request: Request):
"tts": {
"engine": audio_app.state.config.TTS_ENGINE,
"voice": audio_app.state.config.TTS_VOICE,
"split_on": audio_app.state.config.TTS_SPLIT_ON,
},
"stt": {
"engine": audio_app.state.config.STT_ENGINE,

View File

@ -132,7 +132,11 @@ export const synthesizeOpenAISpeech = async (
return res;
};
export const getModels = async (token: string = '') => {
interface AvailableModelsResponse {
models: { name: string; id: string }[] | { id: string }[];
}
export const getModels = async (token: string = ''): Promise<AvailableModelsResponse> => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {

View File

@ -10,31 +10,36 @@
getModels as _getModels,
getVoices as _getVoices
} from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores';
import { config } from '$lib/stores';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const i18n = getContext('i18n');
import { TTS_RESPONSE_SPLIT } from '$lib/types';
export let saveHandler: Function;
import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
const i18n = getContext<Writable<i18nType>>('i18n');
export let saveHandler: () => void;
// Audio
let TTS_OPENAI_API_BASE_URL = '';
let TTS_OPENAI_API_KEY = '';
let TTS_API_KEY = '';
let TTS_ENGINE = '';
let TTS_MODEL = '';
let TTS_VOICE = '';
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
let STT_OPENAI_API_BASE_URL = '';
let STT_OPENAI_API_KEY = '';
let STT_ENGINE = '';
let STT_MODEL = '';
let voices = [];
let models = [];
let nonLocalVoices = false;
// eslint-disable-next-line no-undef
let voices: SpeechSynthesisVoice[] = [];
let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
const getModels = async () => {
if (TTS_ENGINE === '') {
@ -53,8 +58,8 @@
const getVoices = async () => {
if (TTS_ENGINE === '') {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
const getVoicesLoop = setInterval(() => {
voices = speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
@ -81,7 +86,8 @@
API_KEY: TTS_API_KEY,
ENGINE: TTS_ENGINE,
MODEL: TTS_MODEL,
VOICE: TTS_VOICE
VOICE: TTS_VOICE,
SPLIT_ON: TTS_SPLIT_ON
},
stt: {
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
@ -92,9 +98,8 @@
});
if (res) {
toast.success($i18n.t('Audio settings updated successfully'));
config.set(await getBackendConfig());
saveHandler();
getBackendConfig().then(config.set).catch(() => {});
}
};
@ -111,6 +116,8 @@
TTS_MODEL = res.tts.MODEL;
TTS_VOICE = res.tts.VOICE;
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
@ -139,7 +146,7 @@
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={STT_ENGINE}
placeholder="Select an engine"
>
@ -195,7 +202,7 @@
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
<div class="flex items-center relative">
<select
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTS_ENGINE}
placeholder="Select a mode"
on:change={async (e) => {
@ -203,7 +210,7 @@
await getVoices();
await getModels();
if (e.target.value === 'openai') {
if (e.target?.value === 'openai') {
TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1';
} else {
@ -351,6 +358,28 @@
</div>
</div>
{/if}
<hr class="dark:border-gray-850 my-2" />
<div class="pt-0.5 flex w-full justify-between">
<div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
aria-label="Select how to split message text for TTS requests"
bind:value={TTS_SPLIT_ON}
>
{#each Object.values(TTS_RESPONSE_SPLIT) as split}
<option value={split}>{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option>
{/each}
</select>
</div>
</div>
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
{$i18n.t(
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
)}
</div>
</div>
</div>
</div>

View File

@ -3,13 +3,13 @@
import { toast } from 'svelte-sonner';
import mermaid from 'mermaid';
import { getContext, onMount, tick } from 'svelte';
import { getContext, onDestroy, onMount, tick } from 'svelte';
import { goto } from '$app/navigation';
import { page } from '$app/stores';
import type { Writable } from 'svelte/store';
import type { Unsubscriber, Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
import { WEBUI_BASE_URL } from '$lib/constants';
import {
chatId,
@ -19,31 +19,26 @@
models,
settings,
showSidebar,
tags as _tags,
WEBUI_NAME,
banners,
user,
socket,
showCallOverlay,
tools,
currentChatPage,
temporaryChatEnabled
} from '$lib/stores';
import {
convertMessagesToHistory,
copyToClipboard,
getMessageContentParts,
extractSentencesForAudio,
getUserPosition,
promptTemplate,
splitStream
} from '$lib/utils';
import { generateChatCompletion } from '$lib/apis/ollama';
import {
addTagById,
createNewChat,
deleteTagById,
getAllChatTags,
getChatById,
getChatList,
getTagsById,
@ -66,8 +61,6 @@
import MessageInput from '$lib/components/chat/MessageInput.svelte';
import Messages from '$lib/components/chat/Messages.svelte';
import Navbar from '$lib/components/layout/Navbar.svelte';
import CallOverlay from './MessageInput/CallOverlay.svelte';
import { error } from '@sveltejs/kit';
import ChatControls from './ChatControls.svelte';
import EventConfirmDialog from '../common/ConfirmDialog.svelte';
@ -118,6 +111,8 @@
let params = {};
let chatIdUnsubscriber: Unsubscriber | undefined;
$: if (history.currentId !== null) {
let _messages = [];
@ -207,47 +202,51 @@
}
};
onMount(async () => {
const onMessageHandler = async (event) => {
if (event.origin === window.origin) {
// Replace with your iframe's origin
console.log('Message received from iframe:', event.data);
if (event.data.type === 'input:prompt') {
console.log(event.data.text);
const onMessageHandler = async (event: {
origin: string;
data: { type: string; text: string };
}) => {
if (event.origin !== window.origin) {
return;
}
const inputElement = document.getElementById('chat-textarea');
// Replace with your iframe's origin
if (event.data.type === 'input:prompt') {
console.debug(event.data.text);
if (inputElement) {
prompt = event.data.text;
inputElement.focus();
}
}
const inputElement = document.getElementById('chat-textarea');
if (event.data.type === 'action:submit') {
console.log(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(prompt);
}
}
if (event.data.type === 'input:prompt:submit') {
console.log(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(event.data.text);
}
}
if (inputElement) {
prompt = event.data.text;
inputElement.focus();
}
};
window.addEventListener('message', onMessageHandler);
}
$socket.on('chat-events', chatEventHandler);
if (event.data.type === 'action:submit') {
console.debug(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(prompt);
}
}
if (event.data.type === 'input:prompt:submit') {
console.debug(event.data.text);
if (prompt !== '') {
await tick();
submitPrompt(event.data.text);
}
}
};
onMount(async () => {
window.addEventListener('message', onMessageHandler);
$socket?.on('chat-events', chatEventHandler);
if (!$chatId) {
chatId.subscribe(async (value) => {
chatIdUnsubscriber = chatId.subscribe(async (value) => {
if (!value) {
await initNewChat();
}
@ -257,12 +256,12 @@
await goto('/');
}
}
});
return () => {
window.removeEventListener('message', onMessageHandler);
$socket.off('chat-events');
};
onDestroy(() => {
chatIdUnsubscriber?.();
window.removeEventListener('message', onMessageHandler);
$socket?.off('chat-events');
});
//////////////////////////
@ -595,11 +594,11 @@
};
const sendPrompt = async (
prompt,
parentId,
prompt: string,
parentId: string,
{ modelId = null, modelIdx = null, newChat = false } = {}
) => {
let _responses = [];
let _responses: string[] = [];
// If modelId is provided, use it, else use selected model
let selectedModelIds = modelId
@ -609,7 +608,7 @@
: selectedModels;
// Create response messages for each selected model
const responseMessageIds = {};
const responseMessageIds: Record<PropertyKey, string> = {};
for (const [_modelIdx, modelId] of selectedModelIds.entries()) {
const model = $models.filter((m) => m.id === modelId).at(0);
@ -739,13 +738,13 @@
);
currentChatPage.set(1);
await chats.set(await getChatList(localStorage.token, $currentChatPage));
chats.set(await getChatList(localStorage.token, $currentChatPage));
return _responses;
};
const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
let _response = null;
let _response: string | null = null;
const responseMessage = history.messages[responseMessageId];
const userMessage = history.messages[responseMessage.parentId];
@ -776,7 +775,7 @@
...messages
]
.filter((message) => message?.content?.trim())
.map((message, idx, arr) => {
.map((message) => {
// Prepare the base message object
const baseMessage = {
role: message.role,
@ -928,18 +927,26 @@
navigator.vibrate(5);
}
const sentences = extractSentencesForAudio(responseMessage.content);
sentences.pop();
const messageContentParts = getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
sentences.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence
messageContentParts.length > 0 &&
messageContentParts[messageContentParts.length - 1] !==
responseMessage.lastSentence
) {
responseMessage.lastSentence = sentences[sentences.length - 1];
responseMessage.lastSentence =
messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
})
);
}
@ -1042,14 +1049,19 @@
stopResponseFlag = false;
await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
if (lastSentence) {
let lastMessageContentPart =
getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence }
detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}
eventTarget.dispatchEvent(
new CustomEvent('chat:finish', {
detail: {
@ -1249,18 +1261,24 @@
navigator.vibrate(5);
}
const sentences = extractSentencesForAudio(responseMessage.content);
sentences.pop();
const messageContentParts = getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before
if (
sentences.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence
messageContentParts.length > 0 &&
messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence
) {
responseMessage.lastSentence = sentences[sentences.length - 1];
responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
})
);
}
@ -1315,11 +1333,15 @@
stopResponseFlag = false;
await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
if (lastSentence) {
let lastMessageContentPart =
getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent(
new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence }
detail: { id: responseMessageId, content: lastMessageContentPart }
})
);
}

View File

@ -2,11 +2,10 @@
import { toast } from 'svelte-sonner';
import dayjs from 'dayjs';
import { fade } from 'svelte/transition';
import { createEventDispatcher } from 'svelte';
import { onMount, tick, getContext } from 'svelte';
const i18n = getContext('i18n');
const i18n = getContext<Writable<i18nType>>('i18n');
const dispatch = createEventDispatcher();
@ -15,20 +14,19 @@
import { imageGenerations } from '$lib/apis/images';
import {
approximateToHumanReadable,
extractSentences,
replaceTokens,
processResponseContent
extractParagraphsForAudio,
extractSentencesForAudio,
cleanText,
getMessageContentParts
} from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants';
import Name from './Name.svelte';
import ProfileImage from './ProfileImage.svelte';
import Skeleton from './Skeleton.svelte';
import CodeBlock from './CodeBlock.svelte';
import Image from '$lib/components/common/Image.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte';
import RateComment from './RateComment.svelte';
import CitationsModal from '$lib/components/chat/Messages/CitationsModal.svelte';
import Spinner from '$lib/components/common/Spinner.svelte';
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
import Sparkles from '$lib/components/icons/Sparkles.svelte';
@ -36,7 +34,49 @@
import Error from './Error.svelte';
import Citations from './Citations.svelte';
export let message;
import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
interface MessageType {
id: string;
model: string;
content: string;
files?: { type: string; url: string }[];
timestamp: number;
role: string;
statusHistory?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
}[];
status?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
};
done: boolean;
error?: boolean | { content: string };
citations?: string[];
info?: {
openai?: boolean;
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
total_duration?: number;
load_duration?: number;
};
annotation?: { type: string; rating: number };
}
export let message: MessageType;
export let siblings;
export let isLastMessage = true;
@ -60,28 +100,33 @@
let editedContent = '';
let editTextAreaElement: HTMLTextAreaElement;
let sentencesAudio = {};
let speaking = null;
let speakingIdx = null;
let audioParts: Record<number, HTMLAudioElement | null> = {};
let speaking = false;
let speakingIdx: number | undefined;
let loadingSpeech = false;
let generatingImage = false;
let showRateComment = false;
const playAudio = (idx) => {
return new Promise((res) => {
const playAudio = (idx: number) => {
return new Promise<void>((res) => {
speakingIdx = idx;
const audio = sentencesAudio[idx];
const audio = audioParts[idx];
if (!audio) {
return res();
}
audio.play();
audio.onended = async (e) => {
audio.onended = async () => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(sentencesAudio).length - 1 === idx) {
speaking = null;
if (Object.keys(audioParts).length - 1 === idx) {
speaking = false;
}
res(e);
res();
};
});
};
@ -91,113 +136,111 @@
try {
speechSynthesis.cancel();
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
audioParts[speakingIdx]!.pause();
audioParts[speakingIdx]!.currentTime = 0;
}
} catch {}
speaking = null;
speakingIdx = null;
} else {
if ((message?.content ?? '').trim() !== '') {
speaking = true;
speaking = false;
speakingIdx = undefined;
return;
}
if ($config.audio.tts.engine !== '') {
loadingSpeech = true;
if (!(message?.content ?? '').trim().length) {
toast.info($i18n.t('No content to speak'));
return;
}
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
speaking = true;
console.log(sentences);
if ($config.audio.tts.engine !== '') {
loadingSpeech = true;
if (sentences.length > 0) {
sentencesAudio = sentences.reduce((a, e, i, arr) => {
a[i] = null;
return a;
}, {});
const messageContentParts: string[] = getMessageContentParts(
message.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
if (!messageContentParts.length) {
console.log('No content to speak');
toast.info($i18n.t('No content to speak'));
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
sentence
).catch((error) => {
toast.error(error);
speaking = null;
loadingSpeech = false;
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
} else {
speaking = null;
loadingSpeech = false;
}
} else {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) =>
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
console.log(voice);
const speak = new SpeechSynthesisUtterance(message.content);
console.log(speak);
speak.onend = () => {
speaking = null;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
}
};
if (voice) {
speak.voice = voice;
}
speechSynthesis.speak(speak);
}
}, 100);
}
} else {
toast.error($i18n.t('No content to speak'));
speaking = false;
loadingSpeech = false;
return;
}
console.debug('Prepared message content for TTS', messageContentParts);
audioParts = messageContentParts.reduce(
(acc, _sentence, idx) => {
acc[idx] = null;
return acc;
},
{} as typeof audioParts
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
sentence
).catch((error) => {
console.error(error);
toast.error(error);
speaking = false;
loadingSpeech = false;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
audioParts[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
} else {
let voices = [];
const getVoicesLoop = setInterval(() => {
voices = speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.at(0) ?? undefined;
console.log(voice);
const speak = new SpeechSynthesisUtterance(message.content);
console.log(speak);
speak.onend = () => {
speaking = false;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
}
};
if (voice) {
speak.voice = voice;
}
speechSynthesis.speak(speak);
}
}, 100);
}
};
@ -230,7 +273,7 @@
await tick();
};
const generateImage = async (message) => {
const generateImage = async (message: MessageType) => {
generatingImage = true;
const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
toast.error(error);
@ -285,7 +328,7 @@
</Name>
<div>
{#if (message?.files ?? []).filter((f) => f.type === 'image').length > 0}
{#if message?.files && message.files?.filter((f) => f.type === 'image').length > 0}
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
{#each message.files as file}
<div>
@ -304,7 +347,7 @@
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
).at(-1)}
<div class="flex items-center gap-2 pt-0.5 pb-1">
{#if status.done === false}
{#if status?.done === false}
<div class="">
<Spinner className="size-4" />
</div>
@ -521,7 +564,7 @@
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
on:click={() => {
if (!loadingSpeech) {
toggleSpeakMessage(message);
toggleSpeakMessage();
}
}}
>
@ -661,7 +704,7 @@
`${
Math.round(
((message.info.eval_count ?? 0) /
(message.info.eval_duration / 1000000000)) *
((message.info.eval_duration ?? 0) / 1000000000)) *
100
) / 100
} tokens` ?? 'N/A'
@ -669,7 +712,7 @@
prompt_token/s: ${
Math.round(
((message.info.prompt_eval_count ?? 0) /
(message.info.prompt_eval_duration / 1000000000)) *
((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
100
) / 100 ?? 'N/A'
} tokens<br/>
@ -688,7 +731,7 @@
eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/>
approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`}
approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top"
>
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">

View File

@ -138,6 +138,7 @@
"Continue Response": "",
"Continue with {{provider}}": "",
"Controls": "",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "",
"Copied": "",
"Copied shared chat URL to clipboard!": "",
"Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "",
"Other": "",
"Password": "",
"Paragraphs": "",
"PDF document (.pdf)": "",
"PDF Extract Images (OCR)": "",
"pending": "",
@ -483,6 +485,7 @@
"Prompts": "",
"Pull \"{{searchValue}}\" from Ollama.com": "",
"Pull a model from Ollama.com": "",
"Punctuation": "",
"Query Params": "",
"RAG Template": "",
"Read Aloud": "",
@ -504,6 +507,7 @@
"Reset Upload Directory": "",
"Reset Vector Storage": "",
"Response AutoCopy to Clipboard": "",
"Response splitting": "",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "",
"Role": "",
"Rosé Pine": "",

View File

@ -138,6 +138,7 @@
"Continue Response": "",
"Continue with {{provider}}": "",
"Controls": "",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "",
"Copied": "",
"Copied shared chat URL to clipboard!": "",
"Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "",
"Other": "",
"Password": "",
"Paragraphs": "",
"PDF document (.pdf)": "",
"PDF Extract Images (OCR)": "",
"pending": "",
@ -483,6 +485,7 @@
"Prompts": "",
"Pull \"{{searchValue}}\" from Ollama.com": "",
"Pull a model from Ollama.com": "",
"Punctuation": "",
"Query Params": "",
"RAG Template": "",
"Read Aloud": "",
@ -504,6 +507,7 @@
"Reset Upload Directory": "",
"Reset Vector Storage": "",
"Response AutoCopy to Clipboard": "",
"Response splitting": "",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "",
"Role": "",
"Rosé Pine": "",

View File

@ -137,7 +137,8 @@
"Context Length": "Longueur du contexte",
"Continue Response": "Continuer la réponse",
"Continue with {{provider}}": "Continuer avec {{provider}}",
"Controls": "",
"Controls": "Contrôles",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "Contrôle comment le texte des messages est divisé pour les demandes de TTS. 'Ponctuation' divise en phrases, 'paragraphes' divise en paragraphes et 'aucun' garde le message comme une seule chaîne.",
"Copied": "",
"Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!",
"Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "ou",
"Other": "Autre",
"Password": "Mot de passe",
"Paragraphs": "Paragraphes",
"PDF document (.pdf)": "Document au format PDF (.pdf)",
"PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)",
"pending": "en attente",
@ -483,6 +485,7 @@
"Prompts": "Prompts",
"Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com",
"Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com",
"Punctuation": "Ponctuation",
"Query Params": "Paramètres de requête",
"RAG Template": "Modèle RAG",
"Read Aloud": "Lire à haute voix",
@ -504,6 +507,7 @@
"Reset Upload Directory": "Répertoire de téléchargement réinitialisé",
"Reset Vector Storage": "Réinitialiser le stockage des vecteurs",
"Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers",
"Response splitting": "Fractionnement de la réponse",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.",
"Role": "Rôle",
"Rosé Pine": "Pin rosé",

View File

@ -138,6 +138,7 @@
"Continue Response": "Continuer la réponse",
"Continue with {{provider}}": "Continuer avec {{provider}}",
"Controls": "Contrôles",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "Contrôle la façon dont le texte des messages est divisé pour les demandes de TTS. 'Ponctuation' divise en phrases, 'paragraphes' divise en paragraphes et 'aucun' garde le message en tant que chaîne unique.",
"Copied": "Copié",
"Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!",
"Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "ou",
"Other": "Autre",
"Password": "Mot de passe",
"Paragraphs": "Paragraphes",
"PDF document (.pdf)": "Document au format PDF (.pdf)",
"PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)",
"pending": "en attente",
@ -483,6 +485,7 @@
"Prompts": "Prompts",
"Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com",
"Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com",
"Punctuation": "Ponctuation",
"Query Params": "Paramètres de requête",
"RAG Template": "Modèle RAG",
"Read Aloud": "Lire à haute voix",
@ -504,6 +507,7 @@
"Reset Upload Directory": "Répertoire de téléchargement réinitialisé",
"Reset Vector Storage": "Réinitialiser le stockage des vecteurs",
"Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers",
"Response splitting": "Fractionnement de la réponse",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.",
"Role": "Rôle",
"Rosé Pine": "Pin rosé",

View File

@ -7,3 +7,9 @@ export type Banner = {
dismissible?: boolean;
timestamp: number;
};
export enum TTS_RESPONSE_SPLIT {
PUNCTUATION = 'punctuation',
PARAGRAPHS = 'paragraphs',
NONE = 'none',
}

View File

@ -1,6 +1,8 @@
import { v4 as uuidv4 } from 'uuid';
import sha256 from 'js-sha256';
import { WEBUI_BASE_URL } from '$lib/constants';
import { TTS_RESPONSE_SPLIT } from '$lib/types';
//////////////////////////
// Helper functions
@ -408,7 +410,7 @@ const convertOpenAIMessages = (convo) => {
let currentId = '';
let lastId = null;
for (let message_id in mapping) {
for (const message_id in mapping) {
const message = mapping[message_id];
currentId = message_id;
try {
@ -442,7 +444,7 @@ const convertOpenAIMessages = (convo) => {
}
}
let history = {};
const history: Record<PropertyKey, (typeof messages)[number]> = {};
messages.forEach((obj) => (history[obj.id] = obj));
const chat = {
@ -481,7 +483,7 @@ const validateChat = (chat) => {
}
// Every message's content should be a string
for (let message of messages) {
for (const message of messages) {
if (typeof message.content !== 'string') {
return false;
}
@ -494,7 +496,7 @@ export const convertOpenAIChats = (_chats) => {
// Create a list of dictionaries with each conversation from import
const chats = [];
let failed = 0;
for (let convo of _chats) {
for (const convo of _chats) {
const chat = convertOpenAIMessages(convo);
if (validateChat(chat)) {
@ -513,7 +515,7 @@ export const convertOpenAIChats = (_chats) => {
return chats;
};
export const isValidHttpUrl = (string) => {
export const isValidHttpUrl = (string: string) => {
let url;
try {
@ -525,7 +527,7 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:';
};
export const removeEmojis = (str) => {
export const removeEmojis = (str: string) => {
// Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
@ -533,20 +535,24 @@ export const removeEmojis = (str) => {
return str.replace(emojiRegex, '');
};
export const removeFormattings = (str) => {
export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
};
export const extractSentences = (text) => {
// This regular expression matches code blocks marked by triple backticks
const codeBlockRegex = /```[\s\S]*?```/g;
export const cleanText = (content: string) => {
return removeFormattings(removeEmojis(content.trim()));
};
let codeBlocks = [];
// This regular expression matches code blocks marked by triple backticks
const codeBlockRegex = /```[\s\S]*?```/g;
export const extractSentences = (text: string) => {
const codeBlocks: string[] = [];
let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => {
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match;
return placeholder;
});
@ -560,18 +566,40 @@ export const extractSentences = (text) => {
return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
return sentences
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
.filter((sentence) => sentence);
return sentences.map(cleanText).filter(Boolean);
};
export const extractSentencesForAudio = (text) => {
export const extractParagraphsForAudio = (text: string) => {
const codeBlocks: string[] = [];
let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => {
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match;
return placeholder;
});
// Split the modified text into paragraphs based on newlines, avoiding these blocks
let paragraphs = text.split(/\n+/);
// Restore code blocks and process paragraphs
paragraphs = paragraphs.map((paragraph) => {
// Check if the paragraph includes a placeholder for a code block
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
return paragraphs.map(cleanText).filter(Boolean);
};
export const extractSentencesForAudio = (text: string) => {
return extractSentences(text).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
const charCount = previousText.length;
if (wordCount < 4 || charCount < 50) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
@ -580,7 +608,26 @@ export const extractSentencesForAudio = (text) => {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
}, [] as string[]);
};
export const getMessageContentParts = (content: string, split_on: string = 'punctuation') => {
const messageContentParts: string[] = [];
switch (split_on) {
default:
case TTS_RESPONSE_SPLIT.PUNCTUATION:
messageContentParts.push(...extractSentencesForAudio(content));
break;
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
messageContentParts.push(...extractParagraphsForAudio(content));
break;
case TTS_RESPONSE_SPLIT.NONE:
messageContentParts.push(cleanText(content));
break;
}
return messageContentParts;
};
export const blobToFile = (blob, fileName) => {

View File

@ -81,9 +81,17 @@
});
if (userSettings) {
await settings.set(userSettings.ui);
settings.set(userSettings.ui);
} else {
await settings.set(JSON.parse(localStorage.getItem('settings') ?? '{}'));
let localStorageSettings = {} as Parameters<(typeof settings)['set']>[0];
try {
localStorageSettings = JSON.parse(localStorage.getItem('settings') ?? '{}');
} catch (e: unknown) {
console.error('Failed to parse settings from localStorage', e);
}
settings.set(localStorageSettings);
}
await Promise.all([