feat: per model tts voice

This commit is contained in:
Timothy Jaeryang Baek
2026-01-09 19:05:09 +04:00
parent 401c1949a0
commit bb6188abf0
3 changed files with 66 additions and 15 deletions

View File

@@ -363,6 +363,19 @@
let currentMessageId = null;
let currentUtterance = null;
// Get voice: model-specific > user settings > config default
const getVoiceId = () => {
// Check for model-specific TTS voice first
if (model?.info?.meta?.tts?.voice) {
return model.info.meta.tts.voice;
}
// Fall back to user settings or config default
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
return $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice;
}
return $config?.audio?.tts?.voice;
};
const speakSpeechSynthesisHandler = (content) => {
if ($showCallOverlay) {
return new Promise((resolve) => {
@@ -372,11 +385,10 @@
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voiceId = getVoiceId();
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.filter((v) => v.voiceURI === voiceId)
?.at(0) ?? undefined;
currentUtterance = new SpeechSynthesisUtterance(content);
@@ -471,7 +483,7 @@
const url = await $TTSWorker
.generate({
text: content,
voice: $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice
voice: getVoiceId()
})
.catch((error) => {
console.error(error);
@@ -484,9 +496,7 @@
} else if ($config.audio.tts.engine !== '') {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
getVoiceId(),
content
).catch((error) => {
console.error(error);

View File

@@ -207,6 +207,19 @@
speaking = true;
const content = removeAllDetails(message.content);
// Get voice: model-specific > user settings > config default
const getVoiceId = () => {
// Check for model-specific TTS voice first
if (model?.info?.meta?.tts?.voice) {
return model.info.meta.tts.voice;
}
// Fall back to user settings or config default
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
return $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice;
}
return $config?.audio?.tts?.voice;
};
if ($config.audio.tts.engine === '') {
let voices = [];
const getVoicesLoop = setInterval(() => {
@@ -214,11 +227,10 @@
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voiceId = getVoiceId();
const voice =
voices
?.filter(
(v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
)
?.filter((v) => v.voiceURI === voiceId)
?.at(0) ?? undefined;
console.log(voice);
@@ -265,7 +277,9 @@
return;
}
console.debug('Prepared message content for TTS', messageContentParts);
const voiceId = getVoiceId();
console.debug('Prepared message content for TTS', messageContentParts, 'voice:', voiceId);
if ($settings.audio?.tts?.engine === 'browser-kokoro') {
if (!$TTSWorker) {
await TTSWorker.set(
@@ -281,7 +295,7 @@
const url = await $TTSWorker
.generate({
text: sentence,
voice: $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice
voice: voiceId
})
.catch((error) => {
console.error(error);
@@ -300,9 +314,7 @@
for (const [idx, sentence] of messageContentParts.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
: $config?.audio?.tts?.voice,
voiceId,
sentence
).catch((error) => {
console.error(error);
@@ -324,6 +336,7 @@
}
};
let preprocessedDetailsCache = [];
function preprocessForEditing(content: string): string {

View File

@@ -107,6 +107,7 @@
let actionIds = [];
let accessControl = {};
let tts = { voice: '' };
const submitHandler = async () => {
loading = true;
@@ -194,6 +195,18 @@
}
}
if (tts.voice !== '') {
if (!info.meta.tts) info.meta.tts = {};
info.meta.tts.voice = tts.voice;
} else {
if (info.meta.tts?.voice) {
delete info.meta.tts.voice;
if (Object.keys(info.meta.tts).length === 0) {
delete info.meta.tts;
}
}
}
info.params.system = system.trim() === '' ? null : system;
info.params.stop = params.stop ? params.stop.split(',').filter((s) => s.trim()) : null;
Object.keys(info.params).forEach((key) => {
@@ -275,6 +288,7 @@
capabilities = { ...capabilities, ...(model?.meta?.capabilities ?? {}) };
defaultFeatureIds = model?.meta?.defaultFeatureIds ?? [];
tts = { voice: model?.meta?.tts?.voice ?? '' };
if ('access_control' in model) {
accessControl = model.access_control;
@@ -766,6 +780,20 @@
{/if}
{/if}
<div class="my-2">
<div class="flex w-full justify-between mb-1">
<div class="self-center text-xs font-medium text-gray-500">
{$i18n.t('TTS Voice')}
</div>
</div>
<input
class="w-full text-sm bg-transparent outline-hidden"
type="text"
bind:value={tts.voice}
placeholder={$i18n.t('e.g. alloy, echo, shimmer')}
/>
</div>
<hr class=" border-gray-100/30 dark:border-gray-850/30 my-2" />
<div class="my-2 flex justify-end">