mirror of
https://github.com/open-webui/open-webui
synced 2025-01-11 05:18:13 +00:00
459 lines
11 KiB
Svelte
459 lines
11 KiB
Svelte
<script lang="ts">
|
|
import { toast } from 'svelte-sonner';
|
|
import { createEventDispatcher, tick, getContext } from 'svelte';
|
|
import { config, settings } from '$lib/stores';
|
|
import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
|
|
|
|
import { transcribeAudio } from '$lib/apis/audio';
|
|
|
|
const i18n = getContext('i18n');
|
|
|
|
const dispatch = createEventDispatcher();
|
|
|
|
export let recording = false;
|
|
|
|
let loading = false;
|
|
let confirmed = false;
|
|
|
|
let durationSeconds = 0;
|
|
let durationCounter = null;
|
|
|
|
let transcription = '';
|
|
|
|
const startDurationCounter = () => {
|
|
durationCounter = setInterval(() => {
|
|
durationSeconds++;
|
|
}, 1000);
|
|
};
|
|
|
|
const stopDurationCounter = () => {
|
|
clearInterval(durationCounter);
|
|
durationSeconds = 0;
|
|
};
|
|
|
|
$: if (recording) {
|
|
startRecording();
|
|
} else {
|
|
stopRecording();
|
|
}
|
|
|
|
const formatSeconds = (seconds) => {
|
|
const minutes = Math.floor(seconds / 60);
|
|
const remainingSeconds = seconds % 60;
|
|
const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds;
|
|
return `${minutes}:${formattedSeconds}`;
|
|
};
|
|
|
|
let speechRecognition;
|
|
|
|
let mediaRecorder;
|
|
let audioChunks = [];
|
|
|
|
const MIN_DECIBELS = -45;
|
|
const VISUALIZER_BUFFER_LENGTH = 300;
|
|
|
|
let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
|
|
|
|
// Function to calculate the RMS level from time domain data
|
|
const calculateRMS = (data: Uint8Array) => {
|
|
let sumSquares = 0;
|
|
for (let i = 0; i < data.length; i++) {
|
|
const normalizedValue = (data[i] - 128) / 128; // Normalize the data
|
|
sumSquares += normalizedValue * normalizedValue;
|
|
}
|
|
return Math.sqrt(sumSquares / data.length);
|
|
};
|
|
|
|
const normalizeRMS = (rms) => {
|
|
rms = rms * 10;
|
|
const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
|
|
const scaledRMS = Math.pow(rms, exp);
|
|
|
|
// Scale between 0.01 (1%) and 1.0 (100%)
|
|
return Math.min(1.0, Math.max(0.01, scaledRMS));
|
|
};
|
|
|
|
const analyseAudio = (stream) => {
|
|
const audioContext = new AudioContext();
|
|
const audioStreamSource = audioContext.createMediaStreamSource(stream);
|
|
|
|
const analyser = audioContext.createAnalyser();
|
|
analyser.minDecibels = MIN_DECIBELS;
|
|
audioStreamSource.connect(analyser);
|
|
|
|
const bufferLength = analyser.frequencyBinCount;
|
|
|
|
const domainData = new Uint8Array(bufferLength);
|
|
const timeDomainData = new Uint8Array(analyser.fftSize);
|
|
|
|
let lastSoundTime = Date.now();
|
|
|
|
const detectSound = () => {
|
|
const processFrame = () => {
|
|
if (!recording || loading) return;
|
|
|
|
if (recording && !loading) {
|
|
analyser.getByteTimeDomainData(timeDomainData);
|
|
analyser.getByteFrequencyData(domainData);
|
|
|
|
// Calculate RMS level from time domain data
|
|
const rmsLevel = calculateRMS(timeDomainData);
|
|
// Push the calculated decibel level to visualizerData
|
|
visualizerData.push(normalizeRMS(rmsLevel));
|
|
|
|
// Ensure visualizerData array stays within the buffer length
|
|
if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) {
|
|
visualizerData.shift();
|
|
}
|
|
|
|
visualizerData = visualizerData;
|
|
|
|
// if (domainData.some((value) => value > 0)) {
|
|
// lastSoundTime = Date.now();
|
|
// }
|
|
|
|
// if (recording && Date.now() - lastSoundTime > 3000) {
|
|
// if ($settings?.speechAutoSend ?? false) {
|
|
// confirmRecording();
|
|
// }
|
|
// }
|
|
}
|
|
|
|
window.requestAnimationFrame(processFrame);
|
|
};
|
|
|
|
window.requestAnimationFrame(processFrame);
|
|
};
|
|
|
|
detectSound();
|
|
};
|
|
|
|
const transcribeHandler = async (audioBlob) => {
|
|
// Create a blob from the audio chunks
|
|
|
|
await tick();
|
|
const file = blobToFile(audioBlob, 'recording.wav');
|
|
|
|
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
|
|
toast.error(error);
|
|
return null;
|
|
});
|
|
|
|
if (res) {
|
|
console.log(res.text);
|
|
dispatch('confirm', res.text);
|
|
}
|
|
};
|
|
|
|
const saveRecording = (blob) => {
|
|
const url = URL.createObjectURL(blob);
|
|
const a = document.createElement('a');
|
|
document.body.appendChild(a);
|
|
a.style = 'display: none';
|
|
a.href = url;
|
|
a.download = 'recording.wav';
|
|
a.click();
|
|
window.URL.revokeObjectURL(url);
|
|
};
|
|
|
|
const startRecording = async () => {
|
|
startDurationCounter();
|
|
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
mediaRecorder = new MediaRecorder(stream);
|
|
mediaRecorder.onstart = () => {
|
|
console.log('Recording started');
|
|
audioChunks = [];
|
|
analyseAudio(stream);
|
|
};
|
|
mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
|
|
mediaRecorder.onstop = async () => {
|
|
console.log('Recording stopped');
|
|
if (($settings?.audio?.stt?.engine ?? '') === 'web') {
|
|
audioChunks = [];
|
|
} else {
|
|
if (confirmed) {
|
|
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
|
|
|
await transcribeHandler(audioBlob);
|
|
|
|
confirmed = false;
|
|
loading = false;
|
|
}
|
|
audioChunks = [];
|
|
recording = false;
|
|
}
|
|
};
|
|
mediaRecorder.start();
|
|
if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') {
|
|
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
|
|
// Create a SpeechRecognition object
|
|
speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
|
|
|
|
// Set continuous to true for continuous recognition
|
|
speechRecognition.continuous = true;
|
|
|
|
// Set the timeout for turning off the recognition after inactivity (in milliseconds)
|
|
const inactivityTimeout = 2000; // 3 seconds
|
|
|
|
let timeoutId;
|
|
// Start recognition
|
|
speechRecognition.start();
|
|
|
|
// Event triggered when speech is recognized
|
|
speechRecognition.onresult = async (event) => {
|
|
// Clear the inactivity timeout
|
|
clearTimeout(timeoutId);
|
|
|
|
// Handle recognized speech
|
|
console.log(event);
|
|
const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
|
|
|
|
transcription = `${transcription}${transcript}`;
|
|
|
|
await tick();
|
|
document.getElementById('chat-textarea')?.focus();
|
|
|
|
// Restart the inactivity timeout
|
|
timeoutId = setTimeout(() => {
|
|
console.log('Speech recognition turned off due to inactivity.');
|
|
speechRecognition.stop();
|
|
}, inactivityTimeout);
|
|
};
|
|
|
|
// Event triggered when recognition is ended
|
|
speechRecognition.onend = function () {
|
|
// Restart recognition after it ends
|
|
console.log('recognition ended');
|
|
|
|
confirmRecording();
|
|
dispatch('confirm', transcription);
|
|
|
|
confirmed = false;
|
|
loading = false;
|
|
};
|
|
|
|
// Event triggered when an error occurs
|
|
speechRecognition.onerror = function (event) {
|
|
console.log(event);
|
|
toast.error($i18n.t(`Speech recognition error: {{error}}`, { error: event.error }));
|
|
dispatch('cancel');
|
|
|
|
stopRecording();
|
|
};
|
|
}
|
|
}
|
|
};
|
|
|
|
const stopRecording = async () => {
|
|
if (recording && mediaRecorder) {
|
|
await mediaRecorder.stop();
|
|
}
|
|
stopDurationCounter();
|
|
audioChunks = [];
|
|
};
|
|
|
|
const confirmRecording = async () => {
|
|
loading = true;
|
|
confirmed = true;
|
|
|
|
if (recording && mediaRecorder) {
|
|
await mediaRecorder.stop();
|
|
}
|
|
clearInterval(durationCounter);
|
|
};
|
|
</script>
|
|
|
|
<div
|
|
class="{loading
|
|
? ' bg-gray-100/50 dark:bg-gray-850/50'
|
|
: 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex p-2.5"
|
|
>
|
|
<div class="flex items-center mr-1">
|
|
<button
|
|
type="button"
|
|
class="p-1.5
|
|
|
|
{loading
|
|
? ' bg-gray-200 dark:bg-gray-700/50'
|
|
: 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '}
|
|
|
|
|
|
rounded-full"
|
|
on:click={async () => {
|
|
dispatch('cancel');
|
|
stopRecording();
|
|
}}
|
|
>
|
|
<svg
|
|
xmlns="http://www.w3.org/2000/svg"
|
|
fill="none"
|
|
viewBox="0 0 24 24"
|
|
stroke-width="3"
|
|
stroke="currentColor"
|
|
class="size-4"
|
|
>
|
|
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" />
|
|
</svg>
|
|
</button>
|
|
</div>
|
|
|
|
<div
|
|
class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6"
|
|
dir="rtl"
|
|
>
|
|
<div class="flex-1 flex items-center gap-0.5 h-6">
|
|
{#each visualizerData.slice().reverse() as rms}
|
|
<div
|
|
class="w-[2px]
|
|
|
|
{loading
|
|
? ' bg-gray-500 dark:bg-gray-400 '
|
|
: 'bg-indigo-500 dark:bg-indigo-400 '}
|
|
|
|
inline-block h-full"
|
|
style="height: {Math.min(100, Math.max(14, rms * 100))}%;"
|
|
/>
|
|
{/each}
|
|
</div>
|
|
</div>
|
|
|
|
<div class=" mx-1.5 pr-1 flex justify-center items-center">
|
|
<div
|
|
class="text-sm
|
|
|
|
|
|
{loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '}
|
|
font-medium flex-1 mx-auto text-center"
|
|
>
|
|
{formatSeconds(durationSeconds)}
|
|
</div>
|
|
</div>
|
|
|
|
<div class="flex items-center mr-1">
|
|
{#if loading}
|
|
<div class=" text-gray-500 rounded-full cursor-not-allowed">
|
|
<svg
|
|
width="24"
|
|
height="24"
|
|
viewBox="0 0 24 24"
|
|
xmlns="http://www.w3.org/2000/svg"
|
|
fill="currentColor"
|
|
><style>
|
|
.spinner_OSmW {
|
|
transform-origin: center;
|
|
animation: spinner_T6mA 0.75s step-end infinite;
|
|
}
|
|
@keyframes spinner_T6mA {
|
|
8.3% {
|
|
transform: rotate(30deg);
|
|
}
|
|
16.6% {
|
|
transform: rotate(60deg);
|
|
}
|
|
25% {
|
|
transform: rotate(90deg);
|
|
}
|
|
33.3% {
|
|
transform: rotate(120deg);
|
|
}
|
|
41.6% {
|
|
transform: rotate(150deg);
|
|
}
|
|
50% {
|
|
transform: rotate(180deg);
|
|
}
|
|
58.3% {
|
|
transform: rotate(210deg);
|
|
}
|
|
66.6% {
|
|
transform: rotate(240deg);
|
|
}
|
|
75% {
|
|
transform: rotate(270deg);
|
|
}
|
|
83.3% {
|
|
transform: rotate(300deg);
|
|
}
|
|
91.6% {
|
|
transform: rotate(330deg);
|
|
}
|
|
100% {
|
|
transform: rotate(360deg);
|
|
}
|
|
}
|
|
</style><g class="spinner_OSmW"
|
|
><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect
|
|
x="11"
|
|
y="1"
|
|
width="2"
|
|
height="5"
|
|
transform="rotate(30 12 12)"
|
|
opacity=".29"
|
|
/><rect
|
|
x="11"
|
|
y="1"
|
|
width="2"
|
|
height="5"
|
|
transform="rotate(60 12 12)"
|
|
opacity=".43"
|
|
/><rect
|
|
x="11"
|
|
y="1"
|
|
width="2"
|
|
height="5"
|
|
transform="rotate(90 12 12)"
|
|
opacity=".57"
|
|
/><rect
|
|
x="11"
|
|
y="1"
|
|
width="2"
|
|
height="5"
|
|
transform="rotate(120 12 12)"
|
|
opacity=".71"
|
|
/><rect
|
|
x="11"
|
|
y="1"
|
|
width="2"
|
|
height="5"
|
|
transform="rotate(150 12 12)"
|
|
opacity=".86"
|
|
/><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g
|
|
></svg
|
|
>
|
|
</div>
|
|
{:else}
|
|
<button
|
|
type="button"
|
|
class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full"
|
|
on:click={async () => {
|
|
await confirmRecording();
|
|
}}
|
|
>
|
|
<svg
|
|
xmlns="http://www.w3.org/2000/svg"
|
|
fill="none"
|
|
viewBox="0 0 24 24"
|
|
stroke-width="2.5"
|
|
stroke="currentColor"
|
|
class="size-4"
|
|
>
|
|
<path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" />
|
|
</svg>
|
|
</button>
|
|
{/if}
|
|
</div>
|
|
</div>
|
|
|
|
<style>
|
|
.visualizer {
|
|
display: flex;
|
|
height: 100%;
|
|
}
|
|
|
|
.visualizer-bar {
|
|
width: 2px;
|
|
background-color: #4a5aba; /* or whatever color you need */
|
|
}
|
|
</style>
|