Merge pull request #4886 from kiosion/dev

feat: Add control for how message content is split for TTS generation requests
This commit is contained in:
Timothy Jaeryang Baek 2024-08-26 15:02:30 +02:00 committed by GitHub
commit b148865ee8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 424 additions and 236 deletions

View File

@ -37,6 +37,7 @@ from config import (
AUDIO_TTS_ENGINE, AUDIO_TTS_ENGINE,
AUDIO_TTS_MODEL, AUDIO_TTS_MODEL,
AUDIO_TTS_VOICE, AUDIO_TTS_VOICE,
AUDIO_TTS_SPLIT_ON,
AppConfig, AppConfig,
CORS_ALLOW_ORIGIN, CORS_ALLOW_ORIGIN,
) )
@ -72,6 +73,7 @@ app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
# setting device type for whisper model # setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
@ -88,6 +90,7 @@ class TTSConfigForm(BaseModel):
ENGINE: str ENGINE: str
MODEL: str MODEL: str
VOICE: str VOICE: str
SPLIT_ON: str
class STTConfigForm(BaseModel): class STTConfigForm(BaseModel):
@ -139,6 +142,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
"ENGINE": app.state.config.TTS_ENGINE, "ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL, "MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE, "VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
}, },
"stt": { "stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL, "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
@ -159,6 +163,7 @@ async def update_audio_config(
app.state.config.TTS_ENGINE = form_data.tts.ENGINE app.state.config.TTS_ENGINE = form_data.tts.ENGINE
app.state.config.TTS_MODEL = form_data.tts.MODEL app.state.config.TTS_MODEL = form_data.tts.MODEL
app.state.config.TTS_VOICE = form_data.tts.VOICE app.state.config.TTS_VOICE = form_data.tts.VOICE
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
@ -173,6 +178,7 @@ async def update_audio_config(
"ENGINE": app.state.config.TTS_ENGINE, "ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL, "MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE, "VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
}, },
"stt": { "stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL, "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,

View File

@ -1484,3 +1484,9 @@ AUDIO_TTS_VOICE = PersistentConfig(
"audio.tts.voice", "audio.tts.voice",
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
) )
AUDIO_TTS_SPLIT_ON = PersistentConfig(
"AUDIO_TTS_SPLIT_ON",
"audio.tts.split_on",
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
)

View File

@ -1933,6 +1933,7 @@ async def get_app_config(request: Request):
"tts": { "tts": {
"engine": audio_app.state.config.TTS_ENGINE, "engine": audio_app.state.config.TTS_ENGINE,
"voice": audio_app.state.config.TTS_VOICE, "voice": audio_app.state.config.TTS_VOICE,
"split_on": audio_app.state.config.TTS_SPLIT_ON,
}, },
"stt": { "stt": {
"engine": audio_app.state.config.STT_ENGINE, "engine": audio_app.state.config.STT_ENGINE,

View File

@ -132,7 +132,11 @@ export const synthesizeOpenAISpeech = async (
return res; return res;
}; };
export const getModels = async (token: string = '') => { interface AvailableModelsResponse {
models: { name: string; id: string }[] | { id: string }[];
}
export const getModels = async (token: string = ''): Promise<AvailableModelsResponse> => {
let error = null; let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, { const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {

View File

@ -10,31 +10,36 @@
getModels as _getModels, getModels as _getModels,
getVoices as _getVoices getVoices as _getVoices
} from '$lib/apis/audio'; } from '$lib/apis/audio';
import { user, settings, config } from '$lib/stores'; import { config } from '$lib/stores';
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
const i18n = getContext('i18n'); import { TTS_RESPONSE_SPLIT } from '$lib/types';
export let saveHandler: Function; import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
const i18n = getContext<Writable<i18nType>>('i18n');
export let saveHandler: () => void;
// Audio // Audio
let TTS_OPENAI_API_BASE_URL = ''; let TTS_OPENAI_API_BASE_URL = '';
let TTS_OPENAI_API_KEY = ''; let TTS_OPENAI_API_KEY = '';
let TTS_API_KEY = ''; let TTS_API_KEY = '';
let TTS_ENGINE = ''; let TTS_ENGINE = '';
let TTS_MODEL = ''; let TTS_MODEL = '';
let TTS_VOICE = ''; let TTS_VOICE = '';
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
let STT_OPENAI_API_BASE_URL = ''; let STT_OPENAI_API_BASE_URL = '';
let STT_OPENAI_API_KEY = ''; let STT_OPENAI_API_KEY = '';
let STT_ENGINE = ''; let STT_ENGINE = '';
let STT_MODEL = ''; let STT_MODEL = '';
let voices = []; // eslint-disable-next-line no-undef
let models = []; let voices: SpeechSynthesisVoice[] = [];
let nonLocalVoices = false; let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
const getModels = async () => { const getModels = async () => {
if (TTS_ENGINE === '') { if (TTS_ENGINE === '') {
@ -53,8 +58,8 @@
const getVoices = async () => { const getVoices = async () => {
if (TTS_ENGINE === '') { if (TTS_ENGINE === '') {
const getVoicesLoop = setInterval(async () => { const getVoicesLoop = setInterval(() => {
voices = await speechSynthesis.getVoices(); voices = speechSynthesis.getVoices();
// do your loop // do your loop
if (voices.length > 0) { if (voices.length > 0) {
@ -81,7 +86,8 @@
API_KEY: TTS_API_KEY, API_KEY: TTS_API_KEY,
ENGINE: TTS_ENGINE, ENGINE: TTS_ENGINE,
MODEL: TTS_MODEL, MODEL: TTS_MODEL,
VOICE: TTS_VOICE VOICE: TTS_VOICE,
SPLIT_ON: TTS_SPLIT_ON
}, },
stt: { stt: {
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL, OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
@ -92,9 +98,8 @@
}); });
if (res) { if (res) {
toast.success($i18n.t('Audio settings updated successfully')); saveHandler();
getBackendConfig().then(config.set).catch(() => {});
config.set(await getBackendConfig());
} }
}; };
@ -111,6 +116,8 @@
TTS_MODEL = res.tts.MODEL; TTS_MODEL = res.tts.MODEL;
TTS_VOICE = res.tts.VOICE; TTS_VOICE = res.tts.VOICE;
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL; STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY; STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
@ -139,7 +146,7 @@
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div> <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
<div class="flex items-center relative"> <div class="flex items-center relative">
<select <select
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={STT_ENGINE} bind:value={STT_ENGINE}
placeholder="Select an engine" placeholder="Select an engine"
> >
@ -195,7 +202,7 @@
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div> <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
<div class="flex items-center relative"> <div class="flex items-center relative">
<select <select
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTS_ENGINE} bind:value={TTS_ENGINE}
placeholder="Select a mode" placeholder="Select a mode"
on:change={async (e) => { on:change={async (e) => {
@ -203,7 +210,7 @@
await getVoices(); await getVoices();
await getModels(); await getModels();
if (e.target.value === 'openai') { if (e.target?.value === 'openai') {
TTS_VOICE = 'alloy'; TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1'; TTS_MODEL = 'tts-1';
} else { } else {
@ -351,6 +358,28 @@
</div> </div>
</div> </div>
{/if} {/if}
<hr class="dark:border-gray-850 my-2" />
<div class="pt-0.5 flex w-full justify-between">
<div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
aria-label="Select how to split message text for TTS requests"
bind:value={TTS_SPLIT_ON}
>
{#each Object.values(TTS_RESPONSE_SPLIT) as split}
<option value={split}>{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option>
{/each}
</select>
</div>
</div>
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
{$i18n.t(
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
)}
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@ -3,13 +3,13 @@
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import mermaid from 'mermaid'; import mermaid from 'mermaid';
import { getContext, onMount, tick } from 'svelte'; import { getContext, onDestroy, onMount, tick } from 'svelte';
import { goto } from '$app/navigation'; import { goto } from '$app/navigation';
import { page } from '$app/stores'; import { page } from '$app/stores';
import type { Writable } from 'svelte/store'; import type { Unsubscriber, Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next'; import type { i18n as i18nType } from 'i18next';
import { OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants';
import { import {
chatId, chatId,
@ -19,31 +19,26 @@
models, models,
settings, settings,
showSidebar, showSidebar,
tags as _tags,
WEBUI_NAME, WEBUI_NAME,
banners, banners,
user, user,
socket, socket,
showCallOverlay, showCallOverlay,
tools,
currentChatPage, currentChatPage,
temporaryChatEnabled temporaryChatEnabled
} from '$lib/stores'; } from '$lib/stores';
import { import {
convertMessagesToHistory, convertMessagesToHistory,
copyToClipboard, copyToClipboard,
getMessageContentParts,
extractSentencesForAudio, extractSentencesForAudio,
getUserPosition,
promptTemplate, promptTemplate,
splitStream splitStream
} from '$lib/utils'; } from '$lib/utils';
import { generateChatCompletion } from '$lib/apis/ollama'; import { generateChatCompletion } from '$lib/apis/ollama';
import { import {
addTagById,
createNewChat, createNewChat,
deleteTagById,
getAllChatTags,
getChatById, getChatById,
getChatList, getChatList,
getTagsById, getTagsById,
@ -66,8 +61,6 @@
import MessageInput from '$lib/components/chat/MessageInput.svelte'; import MessageInput from '$lib/components/chat/MessageInput.svelte';
import Messages from '$lib/components/chat/Messages.svelte'; import Messages from '$lib/components/chat/Messages.svelte';
import Navbar from '$lib/components/layout/Navbar.svelte'; import Navbar from '$lib/components/layout/Navbar.svelte';
import CallOverlay from './MessageInput/CallOverlay.svelte';
import { error } from '@sveltejs/kit';
import ChatControls from './ChatControls.svelte'; import ChatControls from './ChatControls.svelte';
import EventConfirmDialog from '../common/ConfirmDialog.svelte'; import EventConfirmDialog from '../common/ConfirmDialog.svelte';
@ -118,6 +111,8 @@
let params = {}; let params = {};
let chatIdUnsubscriber: Unsubscriber | undefined;
$: if (history.currentId !== null) { $: if (history.currentId !== null) {
let _messages = []; let _messages = [];
@ -207,13 +202,17 @@
} }
}; };
onMount(async () => { const onMessageHandler = async (event: {
const onMessageHandler = async (event) => { origin: string;
if (event.origin === window.origin) { data: { type: string; text: string };
}) => {
if (event.origin !== window.origin) {
return;
}
// Replace with your iframe's origin // Replace with your iframe's origin
console.log('Message received from iframe:', event.data);
if (event.data.type === 'input:prompt') { if (event.data.type === 'input:prompt') {
console.log(event.data.text); console.debug(event.data.text);
const inputElement = document.getElementById('chat-textarea'); const inputElement = document.getElementById('chat-textarea');
@ -224,7 +223,7 @@
} }
if (event.data.type === 'action:submit') { if (event.data.type === 'action:submit') {
console.log(event.data.text); console.debug(event.data.text);
if (prompt !== '') { if (prompt !== '') {
await tick(); await tick();
@ -233,21 +232,21 @@
} }
if (event.data.type === 'input:prompt:submit') { if (event.data.type === 'input:prompt:submit') {
console.log(event.data.text); console.debug(event.data.text);
if (prompt !== '') { if (prompt !== '') {
await tick(); await tick();
submitPrompt(event.data.text); submitPrompt(event.data.text);
} }
} }
}
}; };
window.addEventListener('message', onMessageHandler);
$socket.on('chat-events', chatEventHandler); onMount(async () => {
window.addEventListener('message', onMessageHandler);
$socket?.on('chat-events', chatEventHandler);
if (!$chatId) { if (!$chatId) {
chatId.subscribe(async (value) => { chatIdUnsubscriber = chatId.subscribe(async (value) => {
if (!value) { if (!value) {
await initNewChat(); await initNewChat();
} }
@ -257,12 +256,12 @@
await goto('/'); await goto('/');
} }
} }
});
return () => { onDestroy(() => {
chatIdUnsubscriber?.();
window.removeEventListener('message', onMessageHandler); window.removeEventListener('message', onMessageHandler);
$socket?.off('chat-events');
$socket.off('chat-events');
};
}); });
////////////////////////// //////////////////////////
@ -595,11 +594,11 @@
}; };
const sendPrompt = async ( const sendPrompt = async (
prompt, prompt: string,
parentId, parentId: string,
{ modelId = null, modelIdx = null, newChat = false } = {} { modelId = null, modelIdx = null, newChat = false } = {}
) => { ) => {
let _responses = []; let _responses: string[] = [];
// If modelId is provided, use it, else use selected model // If modelId is provided, use it, else use selected model
let selectedModelIds = modelId let selectedModelIds = modelId
@ -609,7 +608,7 @@
: selectedModels; : selectedModels;
// Create response messages for each selected model // Create response messages for each selected model
const responseMessageIds = {}; const responseMessageIds: Record<PropertyKey, string> = {};
for (const [_modelIdx, modelId] of selectedModelIds.entries()) { for (const [_modelIdx, modelId] of selectedModelIds.entries()) {
const model = $models.filter((m) => m.id === modelId).at(0); const model = $models.filter((m) => m.id === modelId).at(0);
@ -739,13 +738,13 @@
); );
currentChatPage.set(1); currentChatPage.set(1);
await chats.set(await getChatList(localStorage.token, $currentChatPage)); chats.set(await getChatList(localStorage.token, $currentChatPage));
return _responses; return _responses;
}; };
const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => { const sendPromptOllama = async (model, userPrompt, responseMessageId, _chatId) => {
let _response = null; let _response: string | null = null;
const responseMessage = history.messages[responseMessageId]; const responseMessage = history.messages[responseMessageId];
const userMessage = history.messages[responseMessage.parentId]; const userMessage = history.messages[responseMessage.parentId];
@ -776,7 +775,7 @@
...messages ...messages
] ]
.filter((message) => message?.content?.trim()) .filter((message) => message?.content?.trim())
.map((message, idx, arr) => { .map((message) => {
// Prepare the base message object // Prepare the base message object
const baseMessage = { const baseMessage = {
role: message.role, role: message.role,
@ -928,18 +927,26 @@
navigator.vibrate(5); navigator.vibrate(5);
} }
const sentences = extractSentencesForAudio(responseMessage.content); const messageContentParts = getMessageContentParts(
sentences.pop(); responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before // dispatch only last sentence and make sure it hasn't been dispatched before
if ( if (
sentences.length > 0 && messageContentParts.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence messageContentParts[messageContentParts.length - 1] !==
responseMessage.lastSentence
) { ) {
responseMessage.lastSentence = sentences[sentences.length - 1]; responseMessage.lastSentence =
messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] } detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
}) })
); );
} }
@ -1042,14 +1049,19 @@
stopResponseFlag = false; stopResponseFlag = false;
await tick(); await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; let lastMessageContentPart =
if (lastSentence) { getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence } detail: { id: responseMessageId, content: lastMessageContentPart }
}) })
); );
} }
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat:finish', { new CustomEvent('chat:finish', {
detail: { detail: {
@ -1249,18 +1261,24 @@
navigator.vibrate(5); navigator.vibrate(5);
} }
const sentences = extractSentencesForAudio(responseMessage.content); const messageContentParts = getMessageContentParts(
sentences.pop(); responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
);
messageContentParts.pop();
// dispatch only last sentence and make sure it hasn't been dispatched before // dispatch only last sentence and make sure it hasn't been dispatched before
if ( if (
sentences.length > 0 && messageContentParts.length > 0 &&
sentences[sentences.length - 1] !== responseMessage.lastSentence messageContentParts[messageContentParts.length - 1] !== responseMessage.lastSentence
) { ) {
responseMessage.lastSentence = sentences[sentences.length - 1]; responseMessage.lastSentence = messageContentParts[messageContentParts.length - 1];
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: sentences[sentences.length - 1] } detail: {
id: responseMessageId,
content: messageContentParts[messageContentParts.length - 1]
}
}) })
); );
} }
@ -1315,11 +1333,15 @@
stopResponseFlag = false; stopResponseFlag = false;
await tick(); await tick();
let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? ''; let lastMessageContentPart =
if (lastSentence) { getMessageContentParts(
responseMessage.content,
$config?.audio?.tts?.split_on ?? 'punctuation'
)?.at(-1) ?? '';
if (lastMessageContentPart) {
eventTarget.dispatchEvent( eventTarget.dispatchEvent(
new CustomEvent('chat', { new CustomEvent('chat', {
detail: { id: responseMessageId, content: lastSentence } detail: { id: responseMessageId, content: lastMessageContentPart }
}) })
); );
} }

View File

@ -2,11 +2,10 @@
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
import { fade } from 'svelte/transition';
import { createEventDispatcher } from 'svelte'; import { createEventDispatcher } from 'svelte';
import { onMount, tick, getContext } from 'svelte'; import { onMount, tick, getContext } from 'svelte';
const i18n = getContext('i18n'); const i18n = getContext<Writable<i18nType>>('i18n');
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
@ -15,20 +14,19 @@
import { imageGenerations } from '$lib/apis/images'; import { imageGenerations } from '$lib/apis/images';
import { import {
approximateToHumanReadable, approximateToHumanReadable,
extractSentences, extractParagraphsForAudio,
replaceTokens, extractSentencesForAudio,
processResponseContent cleanText,
getMessageContentParts
} from '$lib/utils'; } from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants';
import Name from './Name.svelte'; import Name from './Name.svelte';
import ProfileImage from './ProfileImage.svelte'; import ProfileImage from './ProfileImage.svelte';
import Skeleton from './Skeleton.svelte'; import Skeleton from './Skeleton.svelte';
import CodeBlock from './CodeBlock.svelte';
import Image from '$lib/components/common/Image.svelte'; import Image from '$lib/components/common/Image.svelte';
import Tooltip from '$lib/components/common/Tooltip.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte';
import RateComment from './RateComment.svelte'; import RateComment from './RateComment.svelte';
import CitationsModal from '$lib/components/chat/Messages/CitationsModal.svelte';
import Spinner from '$lib/components/common/Spinner.svelte'; import Spinner from '$lib/components/common/Spinner.svelte';
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte'; import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
import Sparkles from '$lib/components/icons/Sparkles.svelte'; import Sparkles from '$lib/components/icons/Sparkles.svelte';
@ -36,7 +34,49 @@
import Error from './Error.svelte'; import Error from './Error.svelte';
import Citations from './Citations.svelte'; import Citations from './Citations.svelte';
export let message; import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
interface MessageType {
id: string;
model: string;
content: string;
files?: { type: string; url: string }[];
timestamp: number;
role: string;
statusHistory?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
}[];
status?: {
done: boolean;
action: string;
description: string;
urls?: string[];
query?: string;
};
done: boolean;
error?: boolean | { content: string };
citations?: string[];
info?: {
openai?: boolean;
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
total_duration?: number;
load_duration?: number;
};
annotation?: { type: string; rating: number };
}
export let message: MessageType;
export let siblings; export let siblings;
export let isLastMessage = true; export let isLastMessage = true;
@ -60,28 +100,33 @@
let editedContent = ''; let editedContent = '';
let editTextAreaElement: HTMLTextAreaElement; let editTextAreaElement: HTMLTextAreaElement;
let sentencesAudio = {}; let audioParts: Record<number, HTMLAudioElement | null> = {};
let speaking = null; let speaking = false;
let speakingIdx = null; let speakingIdx: number | undefined;
let loadingSpeech = false; let loadingSpeech = false;
let generatingImage = false; let generatingImage = false;
let showRateComment = false; let showRateComment = false;
const playAudio = (idx) => { const playAudio = (idx: number) => {
return new Promise((res) => { return new Promise<void>((res) => {
speakingIdx = idx; speakingIdx = idx;
const audio = sentencesAudio[idx]; const audio = audioParts[idx];
audio.play();
audio.onended = async (e) => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(sentencesAudio).length - 1 === idx) { if (!audio) {
speaking = null; return res();
} }
res(e); audio.play();
audio.onended = async () => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(audioParts).length - 1 === idx) {
speaking = false;
}
res();
}; };
}); });
}; };
@ -91,46 +136,54 @@
try { try {
speechSynthesis.cancel(); speechSynthesis.cancel();
sentencesAudio[speakingIdx].pause(); if (speakingIdx !== undefined && audioParts[speakingIdx]) {
sentencesAudio[speakingIdx].currentTime = 0; audioParts[speakingIdx]!.pause();
audioParts[speakingIdx]!.currentTime = 0;
}
} catch {} } catch {}
speaking = null; speaking = false;
speakingIdx = null; speakingIdx = undefined;
} else { return;
if ((message?.content ?? '').trim() !== '') { }
if (!(message?.content ?? '').trim().length) {
toast.info($i18n.t('No content to speak'));
return;
}
speaking = true; speaking = true;
if ($config.audio.tts.engine !== '') { if ($config.audio.tts.engine !== '') {
loadingSpeech = true; loadingSpeech = true;
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => { const messageContentParts: string[] = getMessageContentParts(
const lastIndex = mergedTexts.length - 1; message.content,
if (lastIndex >= 0) { $config?.audio?.tts?.split_on ?? 'punctuation'
const previousText = mergedTexts[lastIndex]; );
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
console.log(sentences); if (!messageContentParts.length) {
console.log('No content to speak');
toast.info($i18n.t('No content to speak'));
if (sentences.length > 0) { speaking = false;
sentencesAudio = sentences.reduce((a, e, i, arr) => { loadingSpeech = false;
a[i] = null; return;
return a; }
}, {});
console.debug('Prepared message content for TTS', messageContentParts);
audioParts = messageContentParts.reduce(
(acc, _sentence, idx) => {
acc[idx] = null;
return acc;
},
{} as typeof audioParts
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of sentences.entries()) { for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech( const res = await synthesizeOpenAISpeech(
localStorage.token, localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice $settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
@ -138,39 +191,33 @@
: $config?.audio?.tts?.voice, : $config?.audio?.tts?.voice,
sentence sentence
).catch((error) => { ).catch((error) => {
console.error(error);
toast.error(error); toast.error(error);
speaking = null; speaking = false;
loadingSpeech = false; loadingSpeech = false;
return null;
}); });
if (res) { if (res) {
const blob = await res.blob(); const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob); const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl); const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio; audioParts[idx] = audio;
loadingSpeech = false; loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
} }
} }
} else {
speaking = null;
loadingSpeech = false;
}
} else { } else {
let voices = []; let voices = [];
const getVoicesLoop = setInterval(async () => { const getVoicesLoop = setInterval(() => {
voices = await speechSynthesis.getVoices(); voices = speechSynthesis.getVoices();
if (voices.length > 0) { if (voices.length > 0) {
clearInterval(getVoicesLoop); clearInterval(getVoicesLoop);
const voice = const voice =
voices voices
?.filter( ?.filter(
(v) => (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
) )
?.at(0) ?? undefined; ?.at(0) ?? undefined;
@ -181,7 +228,7 @@
console.log(speak); console.log(speak);
speak.onend = () => { speak.onend = () => {
speaking = null; speaking = false;
if ($settings.conversationMode) { if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click(); document.getElementById('voice-input-button')?.click();
} }
@ -195,10 +242,6 @@
} }
}, 100); }, 100);
} }
} else {
toast.error($i18n.t('No content to speak'));
}
}
}; };
const editMessageHandler = async () => { const editMessageHandler = async () => {
@ -230,7 +273,7 @@
await tick(); await tick();
}; };
const generateImage = async (message) => { const generateImage = async (message: MessageType) => {
generatingImage = true; generatingImage = true;
const res = await imageGenerations(localStorage.token, message.content).catch((error) => { const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
toast.error(error); toast.error(error);
@ -285,7 +328,7 @@
</Name> </Name>
<div> <div>
{#if (message?.files ?? []).filter((f) => f.type === 'image').length > 0} {#if message?.files && message.files?.filter((f) => f.type === 'image').length > 0}
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap"> <div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
{#each message.files as file} {#each message.files as file}
<div> <div>
@ -304,7 +347,7 @@
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])] message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
).at(-1)} ).at(-1)}
<div class="flex items-center gap-2 pt-0.5 pb-1"> <div class="flex items-center gap-2 pt-0.5 pb-1">
{#if status.done === false} {#if status?.done === false}
<div class=""> <div class="">
<Spinner className="size-4" /> <Spinner className="size-4" />
</div> </div>
@ -521,7 +564,7 @@
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition" : 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
on:click={() => { on:click={() => {
if (!loadingSpeech) { if (!loadingSpeech) {
toggleSpeakMessage(message); toggleSpeakMessage();
} }
}} }}
> >
@ -661,7 +704,7 @@
`${ `${
Math.round( Math.round(
((message.info.eval_count ?? 0) / ((message.info.eval_count ?? 0) /
(message.info.eval_duration / 1000000000)) * ((message.info.eval_duration ?? 0) / 1000000000)) *
100 100
) / 100 ) / 100
} tokens` ?? 'N/A' } tokens` ?? 'N/A'
@ -669,7 +712,7 @@
prompt_token/s: ${ prompt_token/s: ${
Math.round( Math.round(
((message.info.prompt_eval_count ?? 0) / ((message.info.prompt_eval_count ?? 0) /
(message.info.prompt_eval_duration / 1000000000)) * ((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
100 100
) / 100 ?? 'N/A' ) / 100 ?? 'N/A'
} tokens<br/> } tokens<br/>
@ -688,7 +731,7 @@
eval_duration: ${ eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/> }ms<br/>
approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`} approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top" placement="top"
> >
<Tooltip content={$i18n.t('Generation Info')} placement="bottom"> <Tooltip content={$i18n.t('Generation Info')} placement="bottom">

View File

@ -138,6 +138,7 @@
"Continue Response": "", "Continue Response": "",
"Continue with {{provider}}": "", "Continue with {{provider}}": "",
"Controls": "", "Controls": "",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "",
"Copied": "", "Copied": "",
"Copied shared chat URL to clipboard!": "", "Copied shared chat URL to clipboard!": "",
"Copied to clipboard": "", "Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "", "or": "",
"Other": "", "Other": "",
"Password": "", "Password": "",
"Paragraphs": "",
"PDF document (.pdf)": "", "PDF document (.pdf)": "",
"PDF Extract Images (OCR)": "", "PDF Extract Images (OCR)": "",
"pending": "", "pending": "",
@ -483,6 +485,7 @@
"Prompts": "", "Prompts": "",
"Pull \"{{searchValue}}\" from Ollama.com": "", "Pull \"{{searchValue}}\" from Ollama.com": "",
"Pull a model from Ollama.com": "", "Pull a model from Ollama.com": "",
"Punctuation": "",
"Query Params": "", "Query Params": "",
"RAG Template": "", "RAG Template": "",
"Read Aloud": "", "Read Aloud": "",
@ -504,6 +507,7 @@
"Reset Upload Directory": "", "Reset Upload Directory": "",
"Reset Vector Storage": "", "Reset Vector Storage": "",
"Response AutoCopy to Clipboard": "", "Response AutoCopy to Clipboard": "",
"Response splitting": "",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "", "Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "",
"Role": "", "Role": "",
"Rosé Pine": "", "Rosé Pine": "",

View File

@ -138,6 +138,7 @@
"Continue Response": "", "Continue Response": "",
"Continue with {{provider}}": "", "Continue with {{provider}}": "",
"Controls": "", "Controls": "",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "",
"Copied": "", "Copied": "",
"Copied shared chat URL to clipboard!": "", "Copied shared chat URL to clipboard!": "",
"Copied to clipboard": "", "Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "", "or": "",
"Other": "", "Other": "",
"Password": "", "Password": "",
"Paragraphs": "",
"PDF document (.pdf)": "", "PDF document (.pdf)": "",
"PDF Extract Images (OCR)": "", "PDF Extract Images (OCR)": "",
"pending": "", "pending": "",
@ -483,6 +485,7 @@
"Prompts": "", "Prompts": "",
"Pull \"{{searchValue}}\" from Ollama.com": "", "Pull \"{{searchValue}}\" from Ollama.com": "",
"Pull a model from Ollama.com": "", "Pull a model from Ollama.com": "",
"Punctuation": "",
"Query Params": "", "Query Params": "",
"RAG Template": "", "RAG Template": "",
"Read Aloud": "", "Read Aloud": "",
@ -504,6 +507,7 @@
"Reset Upload Directory": "", "Reset Upload Directory": "",
"Reset Vector Storage": "", "Reset Vector Storage": "",
"Response AutoCopy to Clipboard": "", "Response AutoCopy to Clipboard": "",
"Response splitting": "",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "", "Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "",
"Role": "", "Role": "",
"Rosé Pine": "", "Rosé Pine": "",

View File

@ -137,7 +137,8 @@
"Context Length": "Longueur du contexte", "Context Length": "Longueur du contexte",
"Continue Response": "Continuer la réponse", "Continue Response": "Continuer la réponse",
"Continue with {{provider}}": "Continuer avec {{provider}}", "Continue with {{provider}}": "Continuer avec {{provider}}",
"Controls": "", "Controls": "Contrôles",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "Contrôle comment le texte des messages est divisé pour les demandes de TTS. 'Ponctuation' divise en phrases, 'paragraphes' divise en paragraphes et 'aucun' garde le message comme une seule chaîne.",
"Copied": "", "Copied": "",
"Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!", "Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!",
"Copied to clipboard": "", "Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "ou", "or": "ou",
"Other": "Autre", "Other": "Autre",
"Password": "Mot de passe", "Password": "Mot de passe",
"Paragraphs": "Paragraphes",
"PDF document (.pdf)": "Document au format PDF (.pdf)", "PDF document (.pdf)": "Document au format PDF (.pdf)",
"PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)", "PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)",
"pending": "en attente", "pending": "en attente",
@ -483,6 +485,7 @@
"Prompts": "Prompts", "Prompts": "Prompts",
"Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com", "Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com",
"Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com", "Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com",
"Punctuation": "Ponctuation",
"Query Params": "Paramètres de requête", "Query Params": "Paramètres de requête",
"RAG Template": "Modèle RAG", "RAG Template": "Modèle RAG",
"Read Aloud": "Lire à haute voix", "Read Aloud": "Lire à haute voix",
@ -504,6 +507,7 @@
"Reset Upload Directory": "Répertoire de téléchargement réinitialisé", "Reset Upload Directory": "Répertoire de téléchargement réinitialisé",
"Reset Vector Storage": "Réinitialiser le stockage des vecteurs", "Reset Vector Storage": "Réinitialiser le stockage des vecteurs",
"Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers", "Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers",
"Response splitting": "Fractionnement de la réponse",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.", "Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.",
"Role": "Rôle", "Role": "Rôle",
"Rosé Pine": "Pin rosé", "Rosé Pine": "Pin rosé",

View File

@ -138,6 +138,7 @@
"Continue Response": "Continuer la réponse", "Continue Response": "Continuer la réponse",
"Continue with {{provider}}": "Continuer avec {{provider}}", "Continue with {{provider}}": "Continuer avec {{provider}}",
"Controls": "Contrôles", "Controls": "Contrôles",
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "Contrôle la façon dont le texte des messages est divisé pour les demandes de TTS. 'Ponctuation' divise en phrases, 'paragraphes' divise en paragraphes et 'aucun' garde le message en tant que chaîne unique.",
"Copied": "Copié", "Copied": "Copié",
"Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!", "Copied shared chat URL to clipboard!": "URL du chat copiée dans le presse-papiers\u00a0!",
"Copied to clipboard": "", "Copied to clipboard": "",
@ -455,6 +456,7 @@
"or": "ou", "or": "ou",
"Other": "Autre", "Other": "Autre",
"Password": "Mot de passe", "Password": "Mot de passe",
"Paragraphs": "Paragraphes",
"PDF document (.pdf)": "Document au format PDF (.pdf)", "PDF document (.pdf)": "Document au format PDF (.pdf)",
"PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)", "PDF Extract Images (OCR)": "Extraction d'images PDF (OCR)",
"pending": "en attente", "pending": "en attente",
@ -483,6 +485,7 @@
"Prompts": "Prompts", "Prompts": "Prompts",
"Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com", "Pull \"{{searchValue}}\" from Ollama.com": "Récupérer « {{searchValue}} » depuis Ollama.com",
"Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com", "Pull a model from Ollama.com": "Télécharger un modèle depuis Ollama.com",
"Punctuation": "Ponctuation",
"Query Params": "Paramètres de requête", "Query Params": "Paramètres de requête",
"RAG Template": "Modèle RAG", "RAG Template": "Modèle RAG",
"Read Aloud": "Lire à haute voix", "Read Aloud": "Lire à haute voix",
@ -504,6 +507,7 @@
"Reset Upload Directory": "Répertoire de téléchargement réinitialisé", "Reset Upload Directory": "Répertoire de téléchargement réinitialisé",
"Reset Vector Storage": "Réinitialiser le stockage des vecteurs", "Reset Vector Storage": "Réinitialiser le stockage des vecteurs",
"Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers", "Response AutoCopy to Clipboard": "Copie automatique de la réponse vers le presse-papiers",
"Response splitting": "Fractionnement de la réponse",
"Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.", "Response notifications cannot be activated as the website permissions have been denied. Please visit your browser settings to grant the necessary access.": "Les notifications de réponse ne peuvent pas être activées car les autorisations du site web ont été refusées. Veuillez visiter les paramètres de votre navigateur pour accorder l'accès nécessaire.",
"Role": "Rôle", "Role": "Rôle",
"Rosé Pine": "Pin rosé", "Rosé Pine": "Pin rosé",

View File

@ -7,3 +7,9 @@ export type Banner = {
dismissible?: boolean; dismissible?: boolean;
timestamp: number; timestamp: number;
}; };
export enum TTS_RESPONSE_SPLIT {
PUNCTUATION = 'punctuation',
PARAGRAPHS = 'paragraphs',
NONE = 'none',
}

View File

@ -1,6 +1,8 @@
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';
import sha256 from 'js-sha256'; import sha256 from 'js-sha256';
import { WEBUI_BASE_URL } from '$lib/constants'; import { WEBUI_BASE_URL } from '$lib/constants';
import { TTS_RESPONSE_SPLIT } from '$lib/types';
////////////////////////// //////////////////////////
// Helper functions // Helper functions
@ -408,7 +410,7 @@ const convertOpenAIMessages = (convo) => {
let currentId = ''; let currentId = '';
let lastId = null; let lastId = null;
for (let message_id in mapping) { for (const message_id in mapping) {
const message = mapping[message_id]; const message = mapping[message_id];
currentId = message_id; currentId = message_id;
try { try {
@ -442,7 +444,7 @@ const convertOpenAIMessages = (convo) => {
} }
} }
let history = {}; const history: Record<PropertyKey, (typeof messages)[number]> = {};
messages.forEach((obj) => (history[obj.id] = obj)); messages.forEach((obj) => (history[obj.id] = obj));
const chat = { const chat = {
@ -481,7 +483,7 @@ const validateChat = (chat) => {
} }
// Every message's content should be a string // Every message's content should be a string
for (let message of messages) { for (const message of messages) {
if (typeof message.content !== 'string') { if (typeof message.content !== 'string') {
return false; return false;
} }
@ -494,7 +496,7 @@ export const convertOpenAIChats = (_chats) => {
// Create a list of dictionaries with each conversation from import // Create a list of dictionaries with each conversation from import
const chats = []; const chats = [];
let failed = 0; let failed = 0;
for (let convo of _chats) { for (const convo of _chats) {
const chat = convertOpenAIMessages(convo); const chat = convertOpenAIMessages(convo);
if (validateChat(chat)) { if (validateChat(chat)) {
@ -513,7 +515,7 @@ export const convertOpenAIChats = (_chats) => {
return chats; return chats;
}; };
export const isValidHttpUrl = (string) => { export const isValidHttpUrl = (string: string) => {
let url; let url;
try { try {
@ -525,7 +527,7 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:'; return url.protocol === 'http:' || url.protocol === 'https:';
}; };
export const removeEmojis = (str) => { export const removeEmojis = (str: string) => {
// Regular expression to match emojis // Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g; const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
@ -533,20 +535,24 @@ export const removeEmojis = (str) => {
return str.replace(emojiRegex, ''); return str.replace(emojiRegex, '');
}; };
export const removeFormattings = (str) => { export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, ''); return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
}; };
export const extractSentences = (text) => { export const cleanText = (content: string) => {
// This regular expression matches code blocks marked by triple backticks return removeFormattings(removeEmojis(content.trim()));
const codeBlockRegex = /```[\s\S]*?```/g; };
let codeBlocks = []; // This regular expression matches code blocks marked by triple backticks
const codeBlockRegex = /```[\s\S]*?```/g;
export const extractSentences = (text: string) => {
const codeBlocks: string[] = [];
let index = 0; let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately // Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => { text = text.replace(codeBlockRegex, (match) => {
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match; codeBlocks[index++] = match;
return placeholder; return placeholder;
}); });
@ -560,18 +566,40 @@ export const extractSentences = (text) => {
return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]); return sentence.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
}); });
return sentences return sentences.map(cleanText).filter(Boolean);
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
.filter((sentence) => sentence);
}; };
export const extractSentencesForAudio = (text) => { export const extractParagraphsForAudio = (text: string) => {
const codeBlocks: string[] = [];
let index = 0;
// Temporarily replace code blocks with placeholders and store the blocks separately
text = text.replace(codeBlockRegex, (match) => {
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
codeBlocks[index++] = match;
return placeholder;
});
// Split the modified text into paragraphs based on newlines, avoiding these blocks
let paragraphs = text.split(/\n+/);
// Restore code blocks and process paragraphs
paragraphs = paragraphs.map((paragraph) => {
// Check if the paragraph includes a placeholder for a code block
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
});
return paragraphs.map(cleanText).filter(Boolean);
};
export const extractSentencesForAudio = (text: string) => {
return extractSentences(text).reduce((mergedTexts, currentText) => { return extractSentences(text).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1; const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) { if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex]; const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length; const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) { const charCount = previousText.length;
if (wordCount < 4 || charCount < 50) {
mergedTexts[lastIndex] = previousText + ' ' + currentText; mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else { } else {
mergedTexts.push(currentText); mergedTexts.push(currentText);
@ -580,7 +608,26 @@ export const extractSentencesForAudio = (text) => {
mergedTexts.push(currentText); mergedTexts.push(currentText);
} }
return mergedTexts; return mergedTexts;
}, []); }, [] as string[]);
};
export const getMessageContentParts = (content: string, split_on: string = 'punctuation') => {
const messageContentParts: string[] = [];
switch (split_on) {
default:
case TTS_RESPONSE_SPLIT.PUNCTUATION:
messageContentParts.push(...extractSentencesForAudio(content));
break;
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
messageContentParts.push(...extractParagraphsForAudio(content));
break;
case TTS_RESPONSE_SPLIT.NONE:
messageContentParts.push(cleanText(content));
break;
}
return messageContentParts;
}; };
export const blobToFile = (blob, fileName) => { export const blobToFile = (blob, fileName) => {

View File

@ -81,9 +81,17 @@
}); });
if (userSettings) { if (userSettings) {
await settings.set(userSettings.ui); settings.set(userSettings.ui);
} else { } else {
await settings.set(JSON.parse(localStorage.getItem('settings') ?? '{}')); let localStorageSettings = {} as Parameters<(typeof settings)['set']>[0];
try {
localStorageSettings = JSON.parse(localStorage.getItem('settings') ?? '{}');
} catch (e: unknown) {
console.error('Failed to parse settings from localStorage', e);
}
settings.set(localStorageSettings);
} }
await Promise.all([ await Promise.all([