mirror of
https://github.com/open-webui/open-webui
synced 2025-06-14 02:11:03 +00:00
refac: optimisation
This commit is contained in:
parent
4e640daf83
commit
fc3a31e3d5
@ -2,8 +2,8 @@
|
|||||||
import { settings, showCallOverlay } from '$lib/stores';
|
import { settings, showCallOverlay } from '$lib/stores';
|
||||||
import { onMount, tick, getContext } from 'svelte';
|
import { onMount, tick, getContext } from 'svelte';
|
||||||
|
|
||||||
import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
|
import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
|
||||||
import { transcribeAudio } from '$lib/apis/audio';
|
import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
|
||||||
import { toast } from 'svelte-sonner';
|
import { toast } from 'svelte-sonner';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
@ -14,7 +14,8 @@
|
|||||||
let confirmed = false;
|
let confirmed = false;
|
||||||
|
|
||||||
let assistantSpeaking = false;
|
let assistantSpeaking = false;
|
||||||
let assistantAudio = null;
|
let assistantAudio = {};
|
||||||
|
let assistantAudioIdx = null;
|
||||||
|
|
||||||
let rmsLevel = 0;
|
let rmsLevel = 0;
|
||||||
let hasStartedSpeaking = false;
|
let hasStartedSpeaking = false;
|
||||||
@ -26,6 +27,7 @@
|
|||||||
let animationFrameId;
|
let animationFrameId;
|
||||||
|
|
||||||
let speechRecognition;
|
let speechRecognition;
|
||||||
|
let currentUtterance = null;
|
||||||
|
|
||||||
let mediaRecorder;
|
let mediaRecorder;
|
||||||
let audioChunks = [];
|
let audioChunks = [];
|
||||||
@ -108,14 +110,7 @@
|
|||||||
// Check if initial speech/noise has started
|
// Check if initial speech/noise has started
|
||||||
const hasSound = domainData.some((value) => value > 0);
|
const hasSound = domainData.some((value) => value > 0);
|
||||||
if (hasSound) {
|
if (hasSound) {
|
||||||
if (assistantSpeaking) {
|
stopAllAudio();
|
||||||
speechSynthesis.cancel();
|
|
||||||
|
|
||||||
if (assistantAudio) {
|
|
||||||
assistantAudio.pause();
|
|
||||||
assistantAudio.currentTime = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
hasStartedSpeaking = true;
|
hasStartedSpeaking = true;
|
||||||
lastSoundTime = Date.now();
|
lastSoundTime = Date.now();
|
||||||
}
|
}
|
||||||
@ -140,6 +135,55 @@
|
|||||||
detectSound();
|
detectSound();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const stopAllAudio = () => {
|
||||||
|
if (currentUtterance) {
|
||||||
|
speechSynthesis.cancel();
|
||||||
|
currentUtterance = null;
|
||||||
|
}
|
||||||
|
if (assistantAudio[assistantAudioIdx]) {
|
||||||
|
assistantAudio[assistantAudioIdx].pause();
|
||||||
|
assistantAudio[assistantAudioIdx].currentTime = 0;
|
||||||
|
}
|
||||||
|
assistantSpeaking = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
const playAudio = (idx) => {
|
||||||
|
return new Promise((res) => {
|
||||||
|
assistantAudioIdx = idx;
|
||||||
|
const audio = assistantAudio[idx];
|
||||||
|
audio.play();
|
||||||
|
audio.onended = async (e) => {
|
||||||
|
await new Promise((r) => setTimeout(r, 300));
|
||||||
|
|
||||||
|
if (Object.keys(assistantAudio).length - 1 === idx) {
|
||||||
|
assistantSpeaking = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
res(e);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const getOpenAISpeech = async (text) => {
|
||||||
|
const res = await synthesizeOpenAISpeech(
|
||||||
|
localStorage.token,
|
||||||
|
$settings?.audio?.speaker ?? 'alloy',
|
||||||
|
text,
|
||||||
|
$settings?.audio?.model ?? 'tts-1'
|
||||||
|
).catch((error) => {
|
||||||
|
toast.error(error);
|
||||||
|
assistantSpeaking = false;
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
const blob = await res.blob();
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
const audio = new Audio(blobUrl);
|
||||||
|
assistantAudio = audio;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const transcribeHandler = async (audioBlob) => {
|
const transcribeHandler = async (audioBlob) => {
|
||||||
// Create a blob from the audio chunks
|
// Create a blob from the audio chunks
|
||||||
|
|
||||||
@ -152,21 +196,68 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
toast.success(res.text);
|
console.log(res.text);
|
||||||
|
|
||||||
const _responses = await submitPrompt(res.text);
|
const _responses = await submitPrompt(res.text);
|
||||||
console.log(_responses);
|
console.log(_responses);
|
||||||
|
|
||||||
if (_responses.at(0)) {
|
if (_responses.at(0)) {
|
||||||
const response = _responses[0];
|
const content = _responses[0];
|
||||||
if (response) {
|
if (content) {
|
||||||
|
assistantSpeakingHandler(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const assistantSpeakingHandler = async (content) => {
|
||||||
assistantSpeaking = true;
|
assistantSpeaking = true;
|
||||||
|
|
||||||
if ($settings?.audio?.TTSEngine ?? '') {
|
if (($settings?.audio?.TTSEngine ?? '') == '') {
|
||||||
speechSynthesis.speak(new SpeechSynthesisUtterance(response));
|
currentUtterance = new SpeechSynthesisUtterance(content);
|
||||||
} else {
|
speechSynthesis.speak(currentUtterance);
|
||||||
|
} else if ($settings?.audio?.TTSEngine === 'openai') {
|
||||||
console.log('openai');
|
console.log('openai');
|
||||||
|
|
||||||
|
const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
|
||||||
|
const lastIndex = mergedTexts.length - 1;
|
||||||
|
if (lastIndex >= 0) {
|
||||||
|
const previousText = mergedTexts[lastIndex];
|
||||||
|
const wordCount = previousText.split(/\s+/).length;
|
||||||
|
if (wordCount < 2) {
|
||||||
|
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
||||||
|
} else {
|
||||||
|
mergedTexts.push(currentText);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
mergedTexts.push(currentText);
|
||||||
|
}
|
||||||
|
return mergedTexts;
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
console.log(sentences);
|
||||||
|
|
||||||
|
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||||
|
|
||||||
|
for (const [idx, sentence] of sentences.entries()) {
|
||||||
|
const res = await synthesizeOpenAISpeech(
|
||||||
|
localStorage.token,
|
||||||
|
$settings?.audio?.speaker,
|
||||||
|
sentence,
|
||||||
|
$settings?.audio?.model
|
||||||
|
).catch((error) => {
|
||||||
|
toast.error(error);
|
||||||
|
|
||||||
|
assistantSpeaking = false;
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
const blob = await res.blob();
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
const audio = new Audio(blobUrl);
|
||||||
|
assistantAudio[idx] = audio;
|
||||||
|
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -311,7 +402,7 @@
|
|||||||
{#if loading}
|
{#if loading}
|
||||||
Thinking...
|
Thinking...
|
||||||
{:else}
|
{:else}
|
||||||
Listening... {Math.round(rmsLevel * 100)}
|
Listening...
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</button>
|
</button>
|
||||||
|
Loading…
Reference in New Issue
Block a user