mirror of
https://github.com/open-webui/open-webui
synced 2025-02-20 20:07:28 +00:00
refac: optimisation
This commit is contained in:
parent
4e640daf83
commit
fc3a31e3d5
@ -2,8 +2,8 @@
|
||||
import { settings, showCallOverlay } from '$lib/stores';
|
||||
import { onMount, tick, getContext } from 'svelte';
|
||||
|
||||
import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
|
||||
import { transcribeAudio } from '$lib/apis/audio';
|
||||
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
|
||||
import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
|
||||
import { toast } from 'svelte-sonner';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
@ -14,7 +14,8 @@
|
||||
let confirmed = false;
|
||||
|
||||
let assistantSpeaking = false;
|
||||
let assistantAudio = null;
|
||||
let assistantAudio = {};
|
||||
let assistantAudioIdx = null;
|
||||
|
||||
let rmsLevel = 0;
|
||||
let hasStartedSpeaking = false;
|
||||
@ -26,6 +27,7 @@
|
||||
let animationFrameId;
|
||||
|
||||
let speechRecognition;
|
||||
let currentUtterance = null;
|
||||
|
||||
let mediaRecorder;
|
||||
let audioChunks = [];
|
||||
@ -108,14 +110,7 @@
|
||||
// Check if initial speech/noise has started
|
||||
const hasSound = domainData.some((value) => value > 0);
|
||||
if (hasSound) {
|
||||
if (assistantSpeaking) {
|
||||
speechSynthesis.cancel();
|
||||
|
||||
if (assistantAudio) {
|
||||
assistantAudio.pause();
|
||||
assistantAudio.currentTime = 0;
|
||||
}
|
||||
}
|
||||
stopAllAudio();
|
||||
hasStartedSpeaking = true;
|
||||
lastSoundTime = Date.now();
|
||||
}
|
||||
@ -140,6 +135,55 @@
|
||||
detectSound();
|
||||
};
|
||||
|
||||
const stopAllAudio = () => {
|
||||
if (currentUtterance) {
|
||||
speechSynthesis.cancel();
|
||||
currentUtterance = null;
|
||||
}
|
||||
if (assistantAudio[assistantAudioIdx]) {
|
||||
assistantAudio[assistantAudioIdx].pause();
|
||||
assistantAudio[assistantAudioIdx].currentTime = 0;
|
||||
}
|
||||
assistantSpeaking = false;
|
||||
};
|
||||
|
||||
const playAudio = (idx) => {
|
||||
return new Promise((res) => {
|
||||
assistantAudioIdx = idx;
|
||||
const audio = assistantAudio[idx];
|
||||
audio.play();
|
||||
audio.onended = async (e) => {
|
||||
await new Promise((r) => setTimeout(r, 300));
|
||||
|
||||
if (Object.keys(assistantAudio).length - 1 === idx) {
|
||||
assistantSpeaking = false;
|
||||
}
|
||||
|
||||
res(e);
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
const getOpenAISpeech = async (text) => {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.speaker ?? 'alloy',
|
||||
text,
|
||||
$settings?.audio?.model ?? 'tts-1'
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
assistantSpeaking = false;
|
||||
return null;
|
||||
});
|
||||
|
||||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
const audio = new Audio(blobUrl);
|
||||
assistantAudio = audio;
|
||||
}
|
||||
};
|
||||
|
||||
const transcribeHandler = async (audioBlob) => {
|
||||
// Create a blob from the audio chunks
|
||||
|
||||
@ -152,21 +196,68 @@
|
||||
});
|
||||
|
||||
if (res) {
|
||||
toast.success(res.text);
|
||||
console.log(res.text);
|
||||
|
||||
const _responses = await submitPrompt(res.text);
|
||||
console.log(_responses);
|
||||
|
||||
if (_responses.at(0)) {
|
||||
const response = _responses[0];
|
||||
if (response) {
|
||||
assistantSpeaking = true;
|
||||
const content = _responses[0];
|
||||
if (content) {
|
||||
assistantSpeakingHandler(content);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if ($settings?.audio?.TTSEngine ?? '') {
|
||||
speechSynthesis.speak(new SpeechSynthesisUtterance(response));
|
||||
const assistantSpeakingHandler = async (content) => {
|
||||
assistantSpeaking = true;
|
||||
|
||||
if (($settings?.audio?.TTSEngine ?? '') == '') {
|
||||
currentUtterance = new SpeechSynthesisUtterance(content);
|
||||
speechSynthesis.speak(currentUtterance);
|
||||
} else if ($settings?.audio?.TTSEngine === 'openai') {
|
||||
console.log('openai');
|
||||
|
||||
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
|
||||
const lastIndex = mergedTexts.length - 1;
|
||||
if (lastIndex >= 0) {
|
||||
const previousText = mergedTexts[lastIndex];
|
||||
const wordCount = previousText.split(/\s+/).length;
|
||||
if (wordCount < 2) {
|
||||
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
||||
} else {
|
||||
console.log('openai');
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
} else {
|
||||
mergedTexts.push(currentText);
|
||||
}
|
||||
return mergedTexts;
|
||||
}, []);
|
||||
|
||||
console.log(sentences);
|
||||
|
||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||
|
||||
for (const [idx, sentence] of sentences.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.audio?.speaker,
|
||||
sentence,
|
||||
$settings?.audio?.model
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
|
||||
assistantSpeaking = false;
|
||||
return null;
|
||||
});
|
||||
|
||||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
const audio = new Audio(blobUrl);
|
||||
assistantAudio[idx] = audio;
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -311,7 +402,7 @@
|
||||
{#if loading}
|
||||
Thinking...
|
||||
{:else}
|
||||
Listening... {Math.round(rmsLevel * 100)}
|
||||
Listening...
|
||||
{/if}
|
||||
</div>
|
||||
</button>
|
||||
|
Loading…
Reference in New Issue
Block a user