mirror of
https://github.com/open-webui/open-webui
synced 2025-05-22 05:53:20 +00:00
feat: Kokoro-js TTS support
This commit is contained in:
parent
a22d1d5410
commit
205ce635f6
@ -4,12 +4,18 @@
|
|||||||
|
|
||||||
import { createEventDispatcher } from 'svelte';
|
import { createEventDispatcher } from 'svelte';
|
||||||
import { onMount, tick, getContext } from 'svelte';
|
import { onMount, tick, getContext } from 'svelte';
|
||||||
|
import type { Writable } from 'svelte/store';
|
||||||
|
import type { i18n as i18nType } from 'i18next';
|
||||||
|
|
||||||
const i18n = getContext<Writable<i18nType>>('i18n');
|
const i18n = getContext<Writable<i18nType>>('i18n');
|
||||||
|
|
||||||
const dispatch = createEventDispatcher();
|
const dispatch = createEventDispatcher();
|
||||||
|
|
||||||
import { config, models, settings, user } from '$lib/stores';
|
import { createNewFeedback, getFeedbackById, updateFeedbackById } from '$lib/apis/evaluations';
|
||||||
|
import { getChatById } from '$lib/apis/chats';
|
||||||
|
import { generateTags } from '$lib/apis';
|
||||||
|
|
||||||
|
import { config, models, settings, TTSWorker, user } from '$lib/stores';
|
||||||
import { synthesizeOpenAISpeech } from '$lib/apis/audio';
|
import { synthesizeOpenAISpeech } from '$lib/apis/audio';
|
||||||
import { imageGenerations } from '$lib/apis/images';
|
import { imageGenerations } from '$lib/apis/images';
|
||||||
import {
|
import {
|
||||||
@ -34,13 +40,8 @@
|
|||||||
import Error from './Error.svelte';
|
import Error from './Error.svelte';
|
||||||
import Citations from './Citations.svelte';
|
import Citations from './Citations.svelte';
|
||||||
import CodeExecutions from './CodeExecutions.svelte';
|
import CodeExecutions from './CodeExecutions.svelte';
|
||||||
|
|
||||||
import type { Writable } from 'svelte/store';
|
|
||||||
import type { i18n as i18nType } from 'i18next';
|
|
||||||
import ContentRenderer from './ContentRenderer.svelte';
|
import ContentRenderer from './ContentRenderer.svelte';
|
||||||
import { createNewFeedback, getFeedbackById, updateFeedbackById } from '$lib/apis/evaluations';
|
import { KokoroWorker } from '$lib/workers/KokoroWorker';
|
||||||
import { getChatById } from '$lib/apis/chats';
|
|
||||||
import { generateTags } from '$lib/apis';
|
|
||||||
|
|
||||||
interface MessageType {
|
interface MessageType {
|
||||||
id: string;
|
id: string;
|
||||||
@ -193,62 +194,7 @@
|
|||||||
|
|
||||||
speaking = true;
|
speaking = true;
|
||||||
|
|
||||||
if ($config.audio.tts.engine !== '') {
|
if ($config.audio.tts.engine === '') {
|
||||||
loadingSpeech = true;
|
|
||||||
|
|
||||||
const messageContentParts: string[] = getMessageContentParts(
|
|
||||||
message.content,
|
|
||||||
$config?.audio?.tts?.split_on ?? 'punctuation'
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!messageContentParts.length) {
|
|
||||||
console.log('No content to speak');
|
|
||||||
toast.info($i18n.t('No content to speak'));
|
|
||||||
|
|
||||||
speaking = false;
|
|
||||||
loadingSpeech = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.debug('Prepared message content for TTS', messageContentParts);
|
|
||||||
|
|
||||||
audioParts = messageContentParts.reduce(
|
|
||||||
(acc, _sentence, idx) => {
|
|
||||||
acc[idx] = null;
|
|
||||||
return acc;
|
|
||||||
},
|
|
||||||
{} as typeof audioParts
|
|
||||||
);
|
|
||||||
|
|
||||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
|
||||||
|
|
||||||
for (const [idx, sentence] of messageContentParts.entries()) {
|
|
||||||
const res = await synthesizeOpenAISpeech(
|
|
||||||
localStorage.token,
|
|
||||||
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
|
||||||
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
|
||||||
: $config?.audio?.tts?.voice,
|
|
||||||
sentence
|
|
||||||
).catch((error) => {
|
|
||||||
console.error(error);
|
|
||||||
toast.error(`${error}`);
|
|
||||||
|
|
||||||
speaking = false;
|
|
||||||
loadingSpeech = false;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res) {
|
|
||||||
const blob = await res.blob();
|
|
||||||
const blobUrl = URL.createObjectURL(blob);
|
|
||||||
const audio = new Audio(blobUrl);
|
|
||||||
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
|
||||||
|
|
||||||
audioParts[idx] = audio;
|
|
||||||
loadingSpeech = false;
|
|
||||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let voices = [];
|
let voices = [];
|
||||||
const getVoicesLoop = setInterval(() => {
|
const getVoicesLoop = setInterval(() => {
|
||||||
voices = speechSynthesis.getVoices();
|
voices = speechSynthesis.getVoices();
|
||||||
@ -283,6 +229,99 @@
|
|||||||
speechSynthesis.speak(speak);
|
speechSynthesis.speak(speak);
|
||||||
}
|
}
|
||||||
}, 100);
|
}, 100);
|
||||||
|
} else {
|
||||||
|
loadingSpeech = true;
|
||||||
|
|
||||||
|
const messageContentParts: string[] = getMessageContentParts(
|
||||||
|
message.content,
|
||||||
|
$config?.audio?.tts?.split_on ?? 'punctuation'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!messageContentParts.length) {
|
||||||
|
console.log('No content to speak');
|
||||||
|
toast.info($i18n.t('No content to speak'));
|
||||||
|
|
||||||
|
speaking = false;
|
||||||
|
loadingSpeech = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug('Prepared message content for TTS', messageContentParts);
|
||||||
|
|
||||||
|
audioParts = messageContentParts.reduce(
|
||||||
|
(acc, _sentence, idx) => {
|
||||||
|
acc[idx] = null;
|
||||||
|
return acc;
|
||||||
|
},
|
||||||
|
{} as typeof audioParts
|
||||||
|
);
|
||||||
|
|
||||||
|
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||||
|
|
||||||
|
if ($settings.audio?.tts?.engine === 'browser-kokoro') {
|
||||||
|
if (!$TTSWorker) {
|
||||||
|
await TTSWorker.set(
|
||||||
|
new KokoroWorker({
|
||||||
|
dtype: $settings.audio?.tts?.engineConfig?.dtype ?? 'fp32'
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
await $TTSWorker.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log($TTSWorker);
|
||||||
|
|
||||||
|
for (const [idx, sentence] of messageContentParts.entries()) {
|
||||||
|
const blob = await $TTSWorker
|
||||||
|
.generate({
|
||||||
|
text: sentence,
|
||||||
|
voice: $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
console.error(error);
|
||||||
|
toast.error(`${error}`);
|
||||||
|
|
||||||
|
speaking = false;
|
||||||
|
loadingSpeech = false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (blob) {
|
||||||
|
const audio = new Audio(blob);
|
||||||
|
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||||
|
|
||||||
|
audioParts[idx] = audio;
|
||||||
|
loadingSpeech = false;
|
||||||
|
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (const [idx, sentence] of messageContentParts.entries()) {
|
||||||
|
const res = await synthesizeOpenAISpeech(
|
||||||
|
localStorage.token,
|
||||||
|
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
||||||
|
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||||
|
: $config?.audio?.tts?.voice,
|
||||||
|
sentence
|
||||||
|
).catch((error) => {
|
||||||
|
console.error(error);
|
||||||
|
toast.error(`${error}`);
|
||||||
|
|
||||||
|
speaking = false;
|
||||||
|
loadingSpeech = false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
const blob = await res.blob();
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
const audio = new Audio(blobUrl);
|
||||||
|
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
|
||||||
|
|
||||||
|
audioParts[idx] = audio;
|
||||||
|
loadingSpeech = false;
|
||||||
|
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { toast } from 'svelte-sonner';
|
import { toast } from 'svelte-sonner';
|
||||||
import { createEventDispatcher, onMount, getContext } from 'svelte';
|
import { createEventDispatcher, onMount, getContext } from 'svelte';
|
||||||
|
import { KokoroTTS } from 'kokoro-js';
|
||||||
|
|
||||||
import { user, settings, config } from '$lib/stores';
|
import { user, settings, config } from '$lib/stores';
|
||||||
import { getVoices as _getVoices } from '$lib/apis/audio';
|
import { getVoices as _getVoices } from '$lib/apis/audio';
|
||||||
|
|
||||||
import Switch from '$lib/components/common/Switch.svelte';
|
import Switch from '$lib/components/common/Switch.svelte';
|
||||||
|
import { round } from '@huggingface/transformers';
|
||||||
|
import Spinner from '$lib/components/common/Spinner.svelte';
|
||||||
const dispatch = createEventDispatcher();
|
const dispatch = createEventDispatcher();
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
@ -20,6 +23,13 @@
|
|||||||
|
|
||||||
let STTEngine = '';
|
let STTEngine = '';
|
||||||
|
|
||||||
|
let TTSEngine = '';
|
||||||
|
let TTSEngineConfig = {};
|
||||||
|
|
||||||
|
let TTSModel = null;
|
||||||
|
let TTSModelProgress = null;
|
||||||
|
let TTSModelLoading = false;
|
||||||
|
|
||||||
let voices = [];
|
let voices = [];
|
||||||
let voice = '';
|
let voice = '';
|
||||||
|
|
||||||
@ -28,6 +38,19 @@
|
|||||||
const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5];
|
const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5];
|
||||||
|
|
||||||
const getVoices = async () => {
|
const getVoices = async () => {
|
||||||
|
if (TTSEngine === 'browser-kokoro') {
|
||||||
|
if (!TTSModel) {
|
||||||
|
await loadKokoro();
|
||||||
|
}
|
||||||
|
|
||||||
|
voices = Object.entries(TTSModel.voices).map(([key, value]) => {
|
||||||
|
return {
|
||||||
|
id: key,
|
||||||
|
name: value.name,
|
||||||
|
localService: false
|
||||||
|
};
|
||||||
|
});
|
||||||
|
} else {
|
||||||
if ($config.audio.tts.engine === '') {
|
if ($config.audio.tts.engine === '') {
|
||||||
const getVoicesLoop = setInterval(async () => {
|
const getVoicesLoop = setInterval(async () => {
|
||||||
voices = await speechSynthesis.getVoices();
|
voices = await speechSynthesis.getVoices();
|
||||||
@ -47,6 +70,7 @@
|
|||||||
voices = res.voices;
|
voices = res.voices;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const toggleResponseAutoPlayback = async () => {
|
const toggleResponseAutoPlayback = async () => {
|
||||||
@ -67,6 +91,9 @@
|
|||||||
|
|
||||||
STTEngine = $settings?.audio?.stt?.engine ?? '';
|
STTEngine = $settings?.audio?.stt?.engine ?? '';
|
||||||
|
|
||||||
|
TTSEngine = $settings?.audio?.tts?.engine ?? '';
|
||||||
|
TTSEngineConfig = $settings?.audio?.tts?.engineConfig ?? {};
|
||||||
|
|
||||||
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
|
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
|
||||||
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
|
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
|
||||||
} else {
|
} else {
|
||||||
@ -77,6 +104,51 @@
|
|||||||
|
|
||||||
await getVoices();
|
await getVoices();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
$: if (TTSEngine && TTSEngineConfig) {
|
||||||
|
onTTSEngineChange();
|
||||||
|
}
|
||||||
|
|
||||||
|
const onTTSEngineChange = async () => {
|
||||||
|
if (TTSEngine === 'browser-kokoro') {
|
||||||
|
await loadKokoro();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const loadKokoro = async () => {
|
||||||
|
if (TTSEngine === 'browser-kokoro') {
|
||||||
|
voices = [];
|
||||||
|
|
||||||
|
if (TTSEngineConfig?.dtype) {
|
||||||
|
TTSModel = null;
|
||||||
|
TTSModelProgress = null;
|
||||||
|
TTSModelLoading = true;
|
||||||
|
|
||||||
|
const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX';
|
||||||
|
|
||||||
|
TTSModel = await KokoroTTS.from_pretrained(model_id, {
|
||||||
|
dtype: TTSEngineConfig.dtype, // Options: "fp32", "fp16", "q8", "q4", "q4f16"
|
||||||
|
device: !!navigator?.gpu ? 'webgpu' : 'wasm', // Detect WebGPU
|
||||||
|
progress_callback: (e) => {
|
||||||
|
TTSModelProgress = e;
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await getVoices();
|
||||||
|
|
||||||
|
// const rawAudio = await tts.generate(inputText, {
|
||||||
|
// // Use `tts.list_voices()` to list all available voices
|
||||||
|
// voice: voice
|
||||||
|
// });
|
||||||
|
|
||||||
|
// const blobUrl = URL.createObjectURL(await rawAudio.toBlob());
|
||||||
|
// const audio = new Audio(blobUrl);
|
||||||
|
|
||||||
|
// audio.play();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<form
|
<form
|
||||||
@ -88,6 +160,8 @@
|
|||||||
engine: STTEngine !== '' ? STTEngine : undefined
|
engine: STTEngine !== '' ? STTEngine : undefined
|
||||||
},
|
},
|
||||||
tts: {
|
tts: {
|
||||||
|
engine: TTSEngine !== '' ? TTSEngine : undefined,
|
||||||
|
engineConfig: TTSEngineConfig,
|
||||||
playbackRate: playbackRate,
|
playbackRate: playbackRate,
|
||||||
voice: voice !== '' ? voice : undefined,
|
voice: voice !== '' ? voice : undefined,
|
||||||
defaultVoice: $config?.audio?.tts?.voice ?? '',
|
defaultVoice: $config?.audio?.tts?.voice ?? '',
|
||||||
@ -142,6 +216,39 @@
|
|||||||
<div>
|
<div>
|
||||||
<div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
|
<div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
|
||||||
|
|
||||||
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<select
|
||||||
|
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||||
|
bind:value={TTSEngine}
|
||||||
|
placeholder="Select an engine"
|
||||||
|
>
|
||||||
|
<option value="">{$i18n.t('Default')}</option>
|
||||||
|
<option value="browser-kokoro">{$i18n.t('Kokoro.js (Browser)')}</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if TTSEngine === 'browser-kokoro'}
|
||||||
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">{$i18n.t('Kokoro.js Dtype')}</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<select
|
||||||
|
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||||
|
bind:value={TTSEngineConfig.dtype}
|
||||||
|
placeholder="Select dtype"
|
||||||
|
>
|
||||||
|
<option value="" disabled selected>Select dtype</option>
|
||||||
|
<option value="fp32">fp32</option>
|
||||||
|
<option value="fp16">fp16</option>
|
||||||
|
<option value="q8">q8</option>
|
||||||
|
<option value="q4">q4</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
<div class=" py-0.5 flex w-full justify-between">
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
|
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
|
||||||
|
|
||||||
@ -178,7 +285,46 @@
|
|||||||
|
|
||||||
<hr class=" dark:border-gray-850" />
|
<hr class=" dark:border-gray-850" />
|
||||||
|
|
||||||
{#if $config.audio.tts.engine === ''}
|
{#if TTSEngine === 'browser-kokoro'}
|
||||||
|
{#if TTSModel}
|
||||||
|
<div>
|
||||||
|
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
list="voice-list"
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||||
|
bind:value={voice}
|
||||||
|
placeholder="Select a voice"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<datalist id="voice-list">
|
||||||
|
{#each voices as voice}
|
||||||
|
<option value={voice.id}>{voice.name}</option>
|
||||||
|
{/each}
|
||||||
|
</datalist>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div>
|
||||||
|
<div class=" mb-2.5 text-sm font-medium flex gap-2 items-center">
|
||||||
|
<Spinner className="size-4" />
|
||||||
|
|
||||||
|
<div class=" text-sm font-medium shimmer">
|
||||||
|
{$i18n.t('Loading Kokoro.js...')}
|
||||||
|
{TTSModelProgress && TTSModelProgress.status === 'progress'
|
||||||
|
? `(${Math.round(TTSModelProgress.progress * 10) / 10}%)`
|
||||||
|
: ''}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="text-xs text-gray-500">
|
||||||
|
{$i18n.t('Please do not close the settings page while loading the model.')}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{:else if $config.audio.tts.engine === ''}
|
||||||
<div>
|
<div>
|
||||||
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
||||||
<div class="flex w-full">
|
<div class="flex w-full">
|
||||||
|
@ -41,6 +41,8 @@ export const shortCodesToEmojis = writable(
|
|||||||
}, {})
|
}, {})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
export const TTSWorker = writable(null);
|
||||||
|
|
||||||
export const chatId = writable('');
|
export const chatId = writable('');
|
||||||
export const chatTitle = writable('');
|
export const chatTitle = writable('');
|
||||||
|
|
||||||
|
70
src/lib/workers/KokoroWorker.ts
Normal file
70
src/lib/workers/KokoroWorker.ts
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import WorkerInstance from '$lib/workers/kokoro.worker?worker';
|
||||||
|
|
||||||
|
export class KokoroWorker {
|
||||||
|
private worker: Worker | null = null;
|
||||||
|
private initialized: boolean = false;
|
||||||
|
private dtype: string;
|
||||||
|
|
||||||
|
constructor(dtype: string = 'fp32') {
|
||||||
|
this.dtype = dtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async init() {
|
||||||
|
if (this.worker) {
|
||||||
|
console.warn('KokoroWorker is already initialized.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.worker = new WorkerInstance();
|
||||||
|
|
||||||
|
return new Promise<void>((resolve, reject) => {
|
||||||
|
this.worker!.onmessage = (event) => {
|
||||||
|
const { status, error } = event.data;
|
||||||
|
|
||||||
|
if (status === 'init:complete') {
|
||||||
|
this.initialized = true;
|
||||||
|
resolve();
|
||||||
|
} else if (status === 'init:error') {
|
||||||
|
console.error(error);
|
||||||
|
this.initialized = false;
|
||||||
|
reject(new Error(error));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this.worker!.postMessage({
|
||||||
|
type: 'init',
|
||||||
|
payload: { dtype: this.dtype }
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public async generate({ text, voice }: { text: string; voice: string }): Promise<string> {
|
||||||
|
if (!this.initialized || !this.worker) {
|
||||||
|
throw new Error('KokoroTTS Worker is not initialized yet.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Promise<string>((resolve, reject) => {
|
||||||
|
this.worker.postMessage({ type: 'generate', payload: { text, voice } });
|
||||||
|
|
||||||
|
const handleMessage = (event: MessageEvent) => {
|
||||||
|
if (event.data.status === 'generate:complete') {
|
||||||
|
this.worker!.removeEventListener('message', handleMessage);
|
||||||
|
resolve(event.data.audioUrl);
|
||||||
|
} else if (event.data.status === 'generate:error') {
|
||||||
|
this.worker!.removeEventListener('message', handleMessage);
|
||||||
|
reject(new Error(event.data.error));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this.worker.addEventListener('message', handleMessage);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public terminate() {
|
||||||
|
if (this.worker) {
|
||||||
|
this.worker.terminate();
|
||||||
|
this.worker = null;
|
||||||
|
this.initialized = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
53
src/lib/workers/kokoro.worker.ts
Normal file
53
src/lib/workers/kokoro.worker.ts
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import { KokoroTTS } from 'kokoro-js';
|
||||||
|
|
||||||
|
let tts;
|
||||||
|
let isInitialized = false; // Flag to track initialization status
|
||||||
|
const DEFAULT_MODEL_ID = 'onnx-community/Kokoro-82M-v1.0-ONNX'; // Default model
|
||||||
|
|
||||||
|
self.onmessage = async (event) => {
|
||||||
|
const { type, payload } = event.data;
|
||||||
|
|
||||||
|
if (type === 'init') {
|
||||||
|
let { model_id, dtype } = payload;
|
||||||
|
model_id = model_id || DEFAULT_MODEL_ID; // Use default model if none provided
|
||||||
|
|
||||||
|
self.postMessage({ status: 'init:start' });
|
||||||
|
|
||||||
|
try {
|
||||||
|
tts = await KokoroTTS.from_pretrained(model_id, {
|
||||||
|
dtype,
|
||||||
|
device: !!navigator?.gpu ? 'webgpu' : 'wasm' // Detect WebGPU
|
||||||
|
});
|
||||||
|
isInitialized = true; // Mark as initialized after successful loading
|
||||||
|
self.postMessage({ status: 'init:complete' });
|
||||||
|
} catch (error) {
|
||||||
|
isInitialized = false; // Ensure it's marked as false on failure
|
||||||
|
self.postMessage({ status: 'init:error', error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === 'generate') {
|
||||||
|
if (!isInitialized || !tts) {
|
||||||
|
// Ensure model is initialized
|
||||||
|
self.postMessage({ status: 'generate:error', error: 'TTS model not initialized' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { text, voice } = payload;
|
||||||
|
self.postMessage({ status: 'generate:start' });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const rawAudio = await tts.generate(text, { voice });
|
||||||
|
const blob = await rawAudio.toBlob();
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
self.postMessage({ status: 'generate:complete', audioUrl: blobUrl });
|
||||||
|
} catch (error) {
|
||||||
|
self.postMessage({ status: 'generate:error', error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === 'status') {
|
||||||
|
// Respond with the current initialization status
|
||||||
|
self.postMessage({ status: 'status:check', initialized: isInitialized });
|
||||||
|
}
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user