mirror of
https://github.com/open-webui/open-webui
synced 2025-03-03 02:41:56 +00:00
feat: Add control for how message content is split for TTS generation reqs
This commit is contained in:
parent
f30428754f
commit
3967c34261
@ -37,6 +37,7 @@ from config import (
|
||||
AUDIO_TTS_ENGINE,
|
||||
AUDIO_TTS_MODEL,
|
||||
AUDIO_TTS_VOICE,
|
||||
AUDIO_TTS_SPLIT_ON,
|
||||
AppConfig,
|
||||
CORS_ALLOW_ORIGIN,
|
||||
)
|
||||
@ -72,6 +73,7 @@ app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
|
||||
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
|
||||
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
|
||||
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
|
||||
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
|
||||
|
||||
# setting device type for whisper model
|
||||
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
|
||||
@ -88,6 +90,7 @@ class TTSConfigForm(BaseModel):
|
||||
ENGINE: str
|
||||
MODEL: str
|
||||
VOICE: str
|
||||
SPLIT_ON: str
|
||||
|
||||
|
||||
class STTConfigForm(BaseModel):
|
||||
@ -139,6 +142,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
|
||||
"ENGINE": app.state.config.TTS_ENGINE,
|
||||
"MODEL": app.state.config.TTS_MODEL,
|
||||
"VOICE": app.state.config.TTS_VOICE,
|
||||
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
|
||||
},
|
||||
"stt": {
|
||||
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
||||
@ -159,6 +163,7 @@ async def update_audio_config(
|
||||
app.state.config.TTS_ENGINE = form_data.tts.ENGINE
|
||||
app.state.config.TTS_MODEL = form_data.tts.MODEL
|
||||
app.state.config.TTS_VOICE = form_data.tts.VOICE
|
||||
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
||||
|
||||
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
|
||||
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
|
||||
@ -173,6 +178,7 @@ async def update_audio_config(
|
||||
"ENGINE": app.state.config.TTS_ENGINE,
|
||||
"MODEL": app.state.config.TTS_MODEL,
|
||||
"VOICE": app.state.config.TTS_VOICE,
|
||||
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
|
||||
},
|
||||
"stt": {
|
||||
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
||||
|
@ -1484,3 +1484,9 @@ AUDIO_TTS_VOICE = PersistentConfig(
|
||||
"audio.tts.voice",
|
||||
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
|
||||
)
|
||||
|
||||
AUDIO_TTS_SPLIT_ON = PersistentConfig(
|
||||
"AUDIO_TTS_SPLIT_ON",
|
||||
"audio.tts.split_on",
|
||||
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
|
||||
)
|
||||
|
@ -1924,6 +1924,7 @@ async def get_app_config(request: Request):
|
||||
"tts": {
|
||||
"engine": audio_app.state.config.TTS_ENGINE,
|
||||
"voice": audio_app.state.config.TTS_VOICE,
|
||||
"split_on": audio_app.state.config.TTS_SPLIT_ON,
|
||||
},
|
||||
"stt": {
|
||||
"engine": audio_app.state.config.STT_ENGINE,
|
||||
|
@ -132,7 +132,11 @@ export const synthesizeOpenAISpeech = async (
|
||||
return res;
|
||||
};
|
||||
|
||||
export const getModels = async (token: string = '') => {
|
||||
interface AvailableModelsResponse {
|
||||
models: { name: string; id: string }[] | { id: string }[];
|
||||
}
|
||||
|
||||
export const getModels = async (token: string = ''): Promise<AvailableModelsResponse> => {
|
||||
let error = null;
|
||||
|
||||
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
|
||||
|
@ -10,31 +10,36 @@
|
||||
getModels as _getModels,
|
||||
getVoices as _getVoices
|
||||
} from '$lib/apis/audio';
|
||||
import { user, settings, config } from '$lib/stores';
|
||||
import { config } from '$lib/stores';
|
||||
|
||||
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||
|
||||
export let saveHandler: Function;
|
||||
import type { Writable } from 'svelte/store';
|
||||
import type { i18n as i18nType } from 'i18next';
|
||||
|
||||
const i18n = getContext<Writable<i18nType>>('i18n');
|
||||
|
||||
export let saveHandler: () => void;
|
||||
|
||||
// Audio
|
||||
|
||||
let TTS_OPENAI_API_BASE_URL = '';
|
||||
let TTS_OPENAI_API_KEY = '';
|
||||
let TTS_API_KEY = '';
|
||||
let TTS_ENGINE = '';
|
||||
let TTS_MODEL = '';
|
||||
let TTS_VOICE = '';
|
||||
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||
|
||||
let STT_OPENAI_API_BASE_URL = '';
|
||||
let STT_OPENAI_API_KEY = '';
|
||||
let STT_ENGINE = '';
|
||||
let STT_MODEL = '';
|
||||
|
||||
let voices = [];
|
||||
let models = [];
|
||||
let nonLocalVoices = false;
|
||||
// eslint-disable-next-line no-undef
|
||||
let voices: SpeechSynthesisVoice[] = [];
|
||||
let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
|
||||
|
||||
const getModels = async () => {
|
||||
if (TTS_ENGINE === '') {
|
||||
@ -53,8 +58,8 @@
|
||||
|
||||
const getVoices = async () => {
|
||||
if (TTS_ENGINE === '') {
|
||||
const getVoicesLoop = setInterval(async () => {
|
||||
voices = await speechSynthesis.getVoices();
|
||||
const getVoicesLoop = setInterval(() => {
|
||||
voices = speechSynthesis.getVoices();
|
||||
|
||||
// do your loop
|
||||
if (voices.length > 0) {
|
||||
@ -81,7 +86,8 @@
|
||||
API_KEY: TTS_API_KEY,
|
||||
ENGINE: TTS_ENGINE,
|
||||
MODEL: TTS_MODEL,
|
||||
VOICE: TTS_VOICE
|
||||
VOICE: TTS_VOICE,
|
||||
SPLIT_ON: TTS_SPLIT_ON
|
||||
},
|
||||
stt: {
|
||||
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
|
||||
@ -92,9 +98,8 @@
|
||||
});
|
||||
|
||||
if (res) {
|
||||
toast.success($i18n.t('Audio settings updated successfully'));
|
||||
|
||||
config.set(await getBackendConfig());
|
||||
saveHandler();
|
||||
getBackendConfig().then(config.set).catch(() => {});
|
||||
}
|
||||
};
|
||||
|
||||
@ -111,6 +116,8 @@
|
||||
TTS_MODEL = res.tts.MODEL;
|
||||
TTS_VOICE = res.tts.VOICE;
|
||||
|
||||
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||
|
||||
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
||||
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
|
||||
|
||||
@ -139,7 +146,7 @@
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={STT_ENGINE}
|
||||
placeholder="Select an engine"
|
||||
>
|
||||
@ -195,7 +202,7 @@
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={TTS_ENGINE}
|
||||
placeholder="Select a mode"
|
||||
on:change={async (e) => {
|
||||
@ -203,7 +210,7 @@
|
||||
await getVoices();
|
||||
await getModels();
|
||||
|
||||
if (e.target.value === 'openai') {
|
||||
if (e.target?.value === 'openai') {
|
||||
TTS_VOICE = 'alloy';
|
||||
TTS_MODEL = 'tts-1';
|
||||
} else {
|
||||
@ -351,6 +358,28 @@
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<hr class="dark:border-gray-850 my-2" />
|
||||
|
||||
<div class="pt-0.5 flex w-full justify-between">
|
||||
<div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
placeholder="Select how to split response text"
|
||||
bind:value={TTS_SPLIT_ON}
|
||||
>
|
||||
{#each Object.values(TTS_RESPONSE_SPLIT) as split}
|
||||
<option value={split}>{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option>
|
||||
{/each}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
|
||||
{$i18n.t(
|
||||
"Choose how to split response text for speech synthesis. 'Punctuation' splits by sentences, 'paragraphs' splits by paragraphs, and 'none' sends the response as a single string."
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -2,11 +2,10 @@
|
||||
import { toast } from 'svelte-sonner';
|
||||
import dayjs from 'dayjs';
|
||||
|
||||
import { fade } from 'svelte/transition';
|
||||
import { createEventDispatcher } from 'svelte';
|
||||
import { onMount, tick, getContext } from 'svelte';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
const i18n = getContext<Writable<i18nType>>('i18n');
|
||||
|
||||
const dispatch = createEventDispatcher();
|
||||
|
||||
@ -15,20 +14,18 @@
|
||||
import { imageGenerations } from '$lib/apis/images';
|
||||
import {
|
||||
approximateToHumanReadable,
|
||||
extractSentences,
|
||||
replaceTokens,
|
||||
processResponseContent
|
||||
extractParagraphsForAudio,
|
||||
extractSentencesForAudio,
|
||||
prepareTextForTTS,
|
||||
} from '$lib/utils';
|
||||
import { WEBUI_BASE_URL } from '$lib/constants';
|
||||
|
||||
import Name from './Name.svelte';
|
||||
import ProfileImage from './ProfileImage.svelte';
|
||||
import Skeleton from './Skeleton.svelte';
|
||||
import CodeBlock from './CodeBlock.svelte';
|
||||
import Image from '$lib/components/common/Image.svelte';
|
||||
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
||||
import RateComment from './RateComment.svelte';
|
||||
import CitationsModal from '$lib/components/chat/Messages/CitationsModal.svelte';
|
||||
import Spinner from '$lib/components/common/Spinner.svelte';
|
||||
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
|
||||
import Sparkles from '$lib/components/icons/Sparkles.svelte';
|
||||
@ -36,7 +33,38 @@
|
||||
import Error from './Error.svelte';
|
||||
import Citations from './Citations.svelte';
|
||||
|
||||
export let message;
|
||||
import type { Writable } from 'svelte/store';
|
||||
import type { i18n as i18nType } from 'i18next';
|
||||
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||
|
||||
interface MessageType {
|
||||
id: string;
|
||||
model: string;
|
||||
content: string;
|
||||
files?: { type: string; url: string }[];
|
||||
timestamp: number;
|
||||
role: string;
|
||||
statusHistory?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }[];
|
||||
status?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; };
|
||||
done: boolean;
|
||||
error?: boolean | { content: string };
|
||||
citations?: string[];
|
||||
info?: {
|
||||
openai?: boolean;
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
eval_count?: number;
|
||||
eval_duration?: number;
|
||||
prompt_eval_count?: number;
|
||||
prompt_eval_duration?: number;
|
||||
total_duration?: number;
|
||||
load_duration?: number;
|
||||
};
|
||||
annotation?: { type: string; rating: number; };
|
||||
}
|
||||
|
||||
export let message: MessageType;
|
||||
export let siblings;
|
||||
|
||||
export let isLastMessage = true;
|
||||
@ -60,28 +88,33 @@
|
||||
let editedContent = '';
|
||||
let editTextAreaElement: HTMLTextAreaElement;
|
||||
|
||||
let sentencesAudio = {};
|
||||
let speaking = null;
|
||||
let speakingIdx = null;
|
||||
let audioParts: Record<number, HTMLAudioElement | null> = {};
|
||||
let speaking = false;
|
||||
let speakingIdx: number | undefined;
|
||||
|
||||
let loadingSpeech = false;
|
||||
let generatingImage = false;
|
||||
|
||||
let showRateComment = false;
|
||||
|
||||
const playAudio = (idx) => {
|
||||
return new Promise((res) => {
|
||||
const playAudio = (idx: number) => {
|
||||
return new Promise<void>((res) => {
|
||||
speakingIdx = idx;
|
||||
const audio = sentencesAudio[idx];
|
||||
const audio = audioParts[idx];
|
||||
|
||||
if (!audio) {
|
||||
return res();
|
||||
}
|
||||
|
||||
audio.play();
|
||||
audio.onended = async (e) => {
|
||||
audio.onended = async () => {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
|
||||
if (Object.keys(sentencesAudio).length - 1 === idx) {
|
||||
speaking = null;
|
||||
if (Object.keys(audioParts).length - 1 === idx) {
|
||||
speaking = false;
|
||||
}
|
||||
|
||||
res(e);
|
||||
res();
|
||||
};
|
||||
});
|
||||
};
|
||||
@ -91,113 +124,119 @@
|
||||
try {
|
||||
speechSynthesis.cancel();
|
||||
|
||||
sentencesAudio[speakingIdx].pause();
|
||||
sentencesAudio[speakingIdx].currentTime = 0;
|
||||
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
|
||||
audioParts[speakingIdx]!.pause();
|
||||
audioParts[speakingIdx]!.currentTime = 0;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
speaking = null;
|
||||
speakingIdx = null;
|
||||
} else {
|
||||
if ((message?.content ?? '').trim() !== '') {
|
||||
speaking = true;
|
||||
speaking = false;
|
||||
speakingIdx = undefined;
|
||||
return;
|
||||
}
|
||||
|
||||
if ($config.audio.tts.engine !== '') {
|
||||
loadingSpeech = true;
|
||||
if (!(message?.content ?? '').trim().length) {
|
||||
toast.info($i18n.t('No content to speak'));
|
||||
return;
|
||||
}
|
||||
|
||||
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
|
||||
const lastIndex = mergedTexts.length - 1;
|
||||
if (lastIndex >= 0) {
|
||||
const previousText = mergedTexts[lastIndex];
|
||||
const wordCount = previousText.split(/\s+/).length;
|
||||
if (wordCount < 2) {
|
||||
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
||||
} else {
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
} else {
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
return mergedTexts;
|
||||
}, []);
|
||||
speaking = true;
|
||||
|
||||
console.log(sentences);
|
||||
if ($config.audio.tts.engine !== '') {
|
||||
loadingSpeech = true;
|
||||
|
||||
if (sentences.length > 0) {
|
||||
sentencesAudio = sentences.reduce((a, e, i, arr) => {
|
||||
a[i] = null;
|
||||
return a;
|
||||
}, {});
|
||||
const preparedMessageContent: string[] = [];
|
||||
|
||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||
|
||||
for (const [idx, sentence] of sentences.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
||||
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
: $config?.audio?.tts?.voice,
|
||||
sentence
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
|
||||
speaking = null;
|
||||
loadingSpeech = false;
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
const audio = new Audio(blobUrl);
|
||||
sentencesAudio[idx] = audio;
|
||||
loadingSpeech = false;
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
speaking = null;
|
||||
loadingSpeech = false;
|
||||
}
|
||||
} else {
|
||||
let voices = [];
|
||||
const getVoicesLoop = setInterval(async () => {
|
||||
voices = await speechSynthesis.getVoices();
|
||||
if (voices.length > 0) {
|
||||
clearInterval(getVoicesLoop);
|
||||
|
||||
const voice =
|
||||
voices
|
||||
?.filter(
|
||||
(v) =>
|
||||
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
)
|
||||
?.at(0) ?? undefined;
|
||||
|
||||
console.log(voice);
|
||||
|
||||
const speak = new SpeechSynthesisUtterance(message.content);
|
||||
|
||||
console.log(speak);
|
||||
|
||||
speak.onend = () => {
|
||||
speaking = null;
|
||||
if ($settings.conversationMode) {
|
||||
document.getElementById('voice-input-button')?.click();
|
||||
}
|
||||
};
|
||||
|
||||
if (voice) {
|
||||
speak.voice = voice;
|
||||
}
|
||||
|
||||
speechSynthesis.speak(speak);
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
} else {
|
||||
toast.error($i18n.t('No content to speak'));
|
||||
switch ($config.audio.tts.split_on) {
|
||||
default:
|
||||
case TTS_RESPONSE_SPLIT.PUNCTUATION:
|
||||
preparedMessageContent.push(...extractSentencesForAudio(message.content));
|
||||
break;
|
||||
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
|
||||
preparedMessageContent.push(...extractParagraphsForAudio(message.content));
|
||||
break;
|
||||
case TTS_RESPONSE_SPLIT.NONE:
|
||||
preparedMessageContent.push(prepareTextForTTS(message.content));
|
||||
break;
|
||||
}
|
||||
|
||||
if (!preparedMessageContent.length) {
|
||||
console.log('No content to speak');
|
||||
toast.info($i18n.t('No content to speak'));
|
||||
|
||||
speaking = false;
|
||||
loadingSpeech = false;
|
||||
return;
|
||||
}
|
||||
|
||||
console.debug('Prepared message content for TTS', preparedMessageContent);
|
||||
|
||||
audioParts = preparedMessageContent.reduce((acc, _sentence, idx) => {
|
||||
acc[idx] = null;
|
||||
return acc;
|
||||
}, {} as typeof audioParts);
|
||||
|
||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||
|
||||
for (const [idx, sentence] of preparedMessageContent.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
||||
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
: $config?.audio?.tts?.voice,
|
||||
sentence
|
||||
).catch((error) => {
|
||||
console.error(error);
|
||||
toast.error(error);
|
||||
|
||||
speaking = false;
|
||||
loadingSpeech = false;
|
||||
});
|
||||
|
||||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
const audio = new Audio(blobUrl);
|
||||
audioParts[idx] = audio;
|
||||
loadingSpeech = false;
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let voices = [];
|
||||
const getVoicesLoop = setInterval(() => {
|
||||
voices = speechSynthesis.getVoices();
|
||||
if (voices.length > 0) {
|
||||
clearInterval(getVoicesLoop);
|
||||
|
||||
const voice =
|
||||
voices
|
||||
?.filter(
|
||||
(v) =>
|
||||
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||
)
|
||||
?.at(0) ?? undefined;
|
||||
|
||||
console.log(voice);
|
||||
|
||||
const speak = new SpeechSynthesisUtterance(message.content);
|
||||
|
||||
console.log(speak);
|
||||
|
||||
speak.onend = () => {
|
||||
speaking = false;
|
||||
if ($settings.conversationMode) {
|
||||
document.getElementById('voice-input-button')?.click();
|
||||
}
|
||||
};
|
||||
|
||||
if (voice) {
|
||||
speak.voice = voice;
|
||||
}
|
||||
|
||||
speechSynthesis.speak(speak);
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
};
|
||||
|
||||
@ -230,7 +269,7 @@
|
||||
await tick();
|
||||
};
|
||||
|
||||
const generateImage = async (message) => {
|
||||
const generateImage = async (message: MessageType) => {
|
||||
generatingImage = true;
|
||||
const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
|
||||
toast.error(error);
|
||||
@ -285,7 +324,7 @@
|
||||
</Name>
|
||||
|
||||
<div>
|
||||
{#if (message?.files ?? []).filter((f) => f.type === 'image').length > 0}
|
||||
{#if message?.files && message.files?.filter((f) => f.type === 'image').length > 0}
|
||||
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
|
||||
{#each message.files as file}
|
||||
<div>
|
||||
@ -304,7 +343,7 @@
|
||||
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
|
||||
).at(-1)}
|
||||
<div class="flex items-center gap-2 pt-0.5 pb-1">
|
||||
{#if status.done === false}
|
||||
{#if status?.done === false}
|
||||
<div class="">
|
||||
<Spinner className="size-4" />
|
||||
</div>
|
||||
@ -521,7 +560,7 @@
|
||||
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
|
||||
on:click={() => {
|
||||
if (!loadingSpeech) {
|
||||
toggleSpeakMessage(message);
|
||||
toggleSpeakMessage();
|
||||
}
|
||||
}}
|
||||
>
|
||||
@ -661,7 +700,7 @@
|
||||
`${
|
||||
Math.round(
|
||||
((message.info.eval_count ?? 0) /
|
||||
(message.info.eval_duration / 1000000000)) *
|
||||
((message.info.eval_duration ?? 0) / 1000000000)) *
|
||||
100
|
||||
) / 100
|
||||
} tokens` ?? 'N/A'
|
||||
@ -669,7 +708,7 @@
|
||||
prompt_token/s: ${
|
||||
Math.round(
|
||||
((message.info.prompt_eval_count ?? 0) /
|
||||
(message.info.prompt_eval_duration / 1000000000)) *
|
||||
((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
|
||||
100
|
||||
) / 100 ?? 'N/A'
|
||||
} tokens<br/>
|
||||
@ -688,7 +727,7 @@
|
||||
eval_duration: ${
|
||||
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
|
||||
}ms<br/>
|
||||
approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`}
|
||||
approximate_total: ${approximateToHumanReadable((message.info.total_duration ?? 0))}`}
|
||||
placement="top"
|
||||
>
|
||||
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">
|
||||
|
@ -7,3 +7,9 @@ export type Banner = {
|
||||
dismissible?: boolean;
|
||||
timestamp: number;
|
||||
};
|
||||
|
||||
export enum TTS_RESPONSE_SPLIT {
|
||||
PUNCTUATION = 'punctuation',
|
||||
PARAGRAPHS = 'paragraphs',
|
||||
NONE = 'none',
|
||||
}
|
||||
|
@ -408,7 +408,7 @@ const convertOpenAIMessages = (convo) => {
|
||||
let currentId = '';
|
||||
let lastId = null;
|
||||
|
||||
for (let message_id in mapping) {
|
||||
for (const message_id in mapping) {
|
||||
const message = mapping[message_id];
|
||||
currentId = message_id;
|
||||
try {
|
||||
@ -442,7 +442,7 @@ const convertOpenAIMessages = (convo) => {
|
||||
}
|
||||
}
|
||||
|
||||
let history = {};
|
||||
const history: Record<PropertyKey, (typeof messages)[number]> = {};
|
||||
messages.forEach((obj) => (history[obj.id] = obj));
|
||||
|
||||
const chat = {
|
||||
@ -481,7 +481,7 @@ const validateChat = (chat) => {
|
||||
}
|
||||
|
||||
// Every message's content should be a string
|
||||
for (let message of messages) {
|
||||
for (const message of messages) {
|
||||
if (typeof message.content !== 'string') {
|
||||
return false;
|
||||
}
|
||||
@ -494,7 +494,7 @@ export const convertOpenAIChats = (_chats) => {
|
||||
// Create a list of dictionaries with each conversation from import
|
||||
const chats = [];
|
||||
let failed = 0;
|
||||
for (let convo of _chats) {
|
||||
for (const convo of _chats) {
|
||||
const chat = convertOpenAIMessages(convo);
|
||||
|
||||
if (validateChat(chat)) {
|
||||
@ -513,7 +513,7 @@ export const convertOpenAIChats = (_chats) => {
|
||||
return chats;
|
||||
};
|
||||
|
||||
export const isValidHttpUrl = (string) => {
|
||||
export const isValidHttpUrl = (string: string) => {
|
||||
let url;
|
||||
|
||||
try {
|
||||
@ -525,7 +525,7 @@ export const isValidHttpUrl = (string) => {
|
||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||
};
|
||||
|
||||
export const removeEmojis = (str) => {
|
||||
export const removeEmojis = (str: string) => {
|
||||
// Regular expression to match emojis
|
||||
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||
|
||||
@ -533,20 +533,24 @@ export const removeEmojis = (str) => {
|
||||
return str.replace(emojiRegex, '');
|
||||
};
|
||||
|
||||
export const removeFormattings = (str) => {
|
||||
export const removeFormattings = (str: string) => {
|
||||
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
|
||||
};
|
||||
|
||||
export const extractSentences = (text) => {
|
||||
// This regular expression matches code blocks marked by triple backticks
|
||||
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||
export const prepareTextForTTS = (content: string) => {
|
||||
return removeFormattings(removeEmojis(content.trim()));
|
||||
};
|
||||
|
||||
let codeBlocks = [];
|
||||
// This regular expression matches code blocks marked by triple backticks
|
||||
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||
|
||||
export const extractSentences = (text: string) => {
|
||||
const codeBlocks: string[] = [];
|
||||
let index = 0;
|
||||
|
||||
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||
text = text.replace(codeBlockRegex, (match) => {
|
||||
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
codeBlocks[index++] = match;
|
||||
return placeholder;
|
||||
});
|
||||
@ -561,11 +565,36 @@ export const extractSentences = (text) => {
|
||||
});
|
||||
|
||||
return sentences
|
||||
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
|
||||
.filter((sentence) => sentence);
|
||||
.map(prepareTextForTTS)
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const extractSentencesForAudio = (text) => {
|
||||
export const extractParagraphsForAudio = (text: string) => {
|
||||
const codeBlocks: string[] = [];
|
||||
let index = 0;
|
||||
|
||||
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||
text = text.replace(codeBlockRegex, (match) => {
|
||||
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||
codeBlocks[index++] = match;
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Split the modified text into paragraphs based on newlines, avoiding these blocks
|
||||
let paragraphs = text.split(/\n+/);
|
||||
|
||||
// Restore code blocks and process paragraphs
|
||||
paragraphs = paragraphs.map((paragraph) => {
|
||||
// Check if the paragraph includes a placeholder for a code block
|
||||
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
|
||||
});
|
||||
|
||||
return paragraphs
|
||||
.map(prepareTextForTTS)
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const extractSentencesForAudio = (text: string) => {
|
||||
return extractSentences(text).reduce((mergedTexts, currentText) => {
|
||||
const lastIndex = mergedTexts.length - 1;
|
||||
if (lastIndex >= 0) {
|
||||
@ -580,7 +609,7 @@ export const extractSentencesForAudio = (text) => {
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
return mergedTexts;
|
||||
}, []);
|
||||
}, [] as string[]);
|
||||
};
|
||||
|
||||
export const blobToFile = (blob, fileName) => {
|
||||
|
Loading…
Reference in New Issue
Block a user