mirror of
https://github.com/open-webui/open-webui
synced 2024-11-06 08:56:39 +00:00
commit
ff4cf16742
@ -17,7 +17,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from faster_whisper import WhisperModel
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
import uuid
|
||||
import requests
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
@ -181,8 +181,15 @@ def transcribe(
|
||||
)
|
||||
|
||||
try:
|
||||
filename = file.filename
|
||||
file_path = f"{UPLOAD_DIR}/{filename}"
|
||||
ext = file.filename.split(".")[-1]
|
||||
|
||||
id = uuid.uuid4()
|
||||
filename = f"{id}.{ext}"
|
||||
|
||||
file_dir = f"{CACHE_DIR}/audio/transcriptions"
|
||||
os.makedirs(file_dir, exist_ok=True)
|
||||
file_path = f"{file_dir}/{filename}"
|
||||
|
||||
contents = file.file.read()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(contents)
|
||||
@ -215,6 +222,11 @@ def transcribe(
|
||||
|
||||
transcript = "".join([segment.text for segment in list(segments)])
|
||||
|
||||
# save the transcript to a json file
|
||||
transcript_file = f"{file_dir}/{id}.json"
|
||||
with open(transcript_file, "w") as f:
|
||||
json.dump({"transcript": transcript}, f)
|
||||
|
||||
return {"text": transcript.strip()}
|
||||
|
||||
except Exception as e:
|
||||
|
File diff suppressed because it is too large
Load Diff
394
src/lib/components/chat/MessageInput/VoiceRecording.svelte
Normal file
394
src/lib/components/chat/MessageInput/VoiceRecording.svelte
Normal file
@ -0,0 +1,394 @@
|
||||
<script lang="ts">
|
||||
import { toast } from 'svelte-sonner';
|
||||
import { createEventDispatcher, tick, getContext } from 'svelte';
|
||||
import { settings } from '$lib/stores';
|
||||
import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
|
||||
|
||||
import { transcribeAudio } from '$lib/apis/audio';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
|
||||
const dispatch = createEventDispatcher();
|
||||
|
||||
export let recording = false;
|
||||
|
||||
let loading = false;
|
||||
let confirmed = false;
|
||||
|
||||
let durationSeconds = 0;
|
||||
let durationCounter = null;
|
||||
|
||||
const startDurationCounter = () => {
|
||||
durationCounter = setInterval(() => {
|
||||
durationSeconds++;
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const stopDurationCounter = () => {
|
||||
clearInterval(durationCounter);
|
||||
durationSeconds = 0;
|
||||
};
|
||||
|
||||
$: if (recording) {
|
||||
startRecording();
|
||||
} else {
|
||||
stopRecording();
|
||||
}
|
||||
|
||||
const formatSeconds = (seconds) => {
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
const remainingSeconds = seconds % 60;
|
||||
const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds;
|
||||
return `${minutes}:${formattedSeconds}`;
|
||||
};
|
||||
|
||||
let speechRecognition;
|
||||
|
||||
let mediaRecorder;
|
||||
let audioChunks = [];
|
||||
|
||||
const MIN_DECIBELS = -45;
|
||||
const VISUALIZER_BUFFER_LENGTH = 300;
|
||||
|
||||
let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
|
||||
|
||||
// Function to calculate the RMS level from time domain data
|
||||
const calculateRMS = (data: Uint8Array) => {
|
||||
let sumSquares = 0;
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const normalizedValue = (data[i] - 128) / 128; // Normalize the data
|
||||
sumSquares += normalizedValue * normalizedValue;
|
||||
}
|
||||
return Math.sqrt(sumSquares / data.length);
|
||||
};
|
||||
|
||||
const normalizeRMS = (rms) => {
|
||||
rms = rms * 10;
|
||||
const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
|
||||
const scaledRMS = Math.pow(rms, exp);
|
||||
|
||||
// Scale between 0.01 (1%) and 1.0 (100%)
|
||||
return Math.min(1.0, Math.max(0.01, scaledRMS));
|
||||
};
|
||||
|
||||
const analyseAudio = (stream) => {
|
||||
const audioContext = new AudioContext();
|
||||
const audioStreamSource = audioContext.createMediaStreamSource(stream);
|
||||
|
||||
const analyser = audioContext.createAnalyser();
|
||||
analyser.minDecibels = MIN_DECIBELS;
|
||||
audioStreamSource.connect(analyser);
|
||||
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
|
||||
const domainData = new Uint8Array(bufferLength);
|
||||
const timeDomainData = new Uint8Array(analyser.fftSize);
|
||||
|
||||
let lastSoundTime = Date.now();
|
||||
|
||||
const detectSound = () => {
|
||||
const processFrame = () => {
|
||||
if (recording && !loading) {
|
||||
analyser.getByteTimeDomainData(timeDomainData);
|
||||
analyser.getByteFrequencyData(domainData);
|
||||
|
||||
// Calculate RMS level from time domain data
|
||||
const rmsLevel = calculateRMS(timeDomainData);
|
||||
// Push the calculated decibel level to visualizerData
|
||||
visualizerData.push(normalizeRMS(rmsLevel));
|
||||
|
||||
// Ensure visualizerData array stays within the buffer length
|
||||
if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) {
|
||||
visualizerData.shift();
|
||||
}
|
||||
|
||||
visualizerData = visualizerData;
|
||||
|
||||
if (domainData.some((value) => value > 0)) {
|
||||
lastSoundTime = Date.now();
|
||||
}
|
||||
|
||||
if (recording && Date.now() - lastSoundTime > 3000) {
|
||||
if ($settings?.speechAutoSend ?? false) {
|
||||
confirmRecording();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
window.requestAnimationFrame(processFrame);
|
||||
};
|
||||
|
||||
window.requestAnimationFrame(processFrame);
|
||||
};
|
||||
|
||||
detectSound();
|
||||
};
|
||||
|
||||
const transcribeHandler = async (audioBlob) => {
|
||||
// Create a blob from the audio chunks
|
||||
|
||||
await tick();
|
||||
const file = blobToFile(audioBlob, 'recording.wav');
|
||||
|
||||
const res = await transcribeAudio(localStorage.token, file).catch((error) => {
|
||||
toast.error(error);
|
||||
return null;
|
||||
});
|
||||
|
||||
if (res) {
|
||||
console.log(res.text);
|
||||
dispatch('confirm', res.text);
|
||||
}
|
||||
};
|
||||
|
||||
const saveRecording = (blob) => {
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
document.body.appendChild(a);
|
||||
a.style = 'display: none';
|
||||
a.href = url;
|
||||
a.download = 'recording.wav';
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
startDurationCounter();
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
mediaRecorder.onstart = () => {
|
||||
console.log('Recording started');
|
||||
audioChunks = [];
|
||||
analyseAudio(stream);
|
||||
};
|
||||
mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
|
||||
mediaRecorder.onstop = async () => {
|
||||
console.log('Recording stopped');
|
||||
|
||||
if (confirmed) {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
||||
|
||||
await transcribeHandler(audioBlob);
|
||||
|
||||
confirmed = false;
|
||||
loading = false;
|
||||
}
|
||||
|
||||
audioChunks = [];
|
||||
recording = false;
|
||||
};
|
||||
mediaRecorder.start();
|
||||
};
|
||||
|
||||
const stopRecording = async () => {
|
||||
if (recording && mediaRecorder) {
|
||||
await mediaRecorder.stop();
|
||||
}
|
||||
stopDurationCounter();
|
||||
audioChunks = [];
|
||||
};
|
||||
|
||||
const confirmRecording = async () => {
|
||||
loading = true;
|
||||
confirmed = true;
|
||||
|
||||
if (recording && mediaRecorder) {
|
||||
await mediaRecorder.stop();
|
||||
}
|
||||
clearInterval(durationCounter);
|
||||
};
|
||||
</script>
|
||||
|
||||
<div
|
||||
class="{loading
|
||||
? ' bg-gray-100/50 dark:bg-gray-850/50'
|
||||
: 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex p-2.5"
|
||||
>
|
||||
<div class="flex items-center mr-1">
|
||||
<button
|
||||
type="button"
|
||||
class="p-1.5
|
||||
|
||||
{loading
|
||||
? ' bg-gray-200 dark:bg-gray-700/50'
|
||||
: 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '}
|
||||
|
||||
|
||||
rounded-full"
|
||||
on:click={async () => {
|
||||
dispatch('cancel');
|
||||
stopRecording();
|
||||
}}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke-width="3"
|
||||
stroke="currentColor"
|
||||
class="size-4"
|
||||
>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div
|
||||
class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6"
|
||||
dir="rtl"
|
||||
>
|
||||
<div class="flex-1 flex items-center gap-0.5 h-6">
|
||||
{#each visualizerData.slice().reverse() as rms}
|
||||
<div
|
||||
class="w-[2px]
|
||||
|
||||
{loading
|
||||
? ' bg-gray-500 dark:bg-gray-400 '
|
||||
: 'bg-indigo-500 dark:bg-indigo-400 '}
|
||||
|
||||
inline-block h-full"
|
||||
style="height: {Math.min(100, Math.max(14, rms * 100))}%;"
|
||||
/>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" mx-1.5 pr-1 flex justify-center items-center">
|
||||
<div
|
||||
class="text-sm
|
||||
|
||||
|
||||
{loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '}
|
||||
font-medium flex-1 mx-auto text-center"
|
||||
>
|
||||
{formatSeconds(durationSeconds)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center mr-1">
|
||||
{#if loading}
|
||||
<div class=" text-gray-500 rounded-full cursor-not-allowed">
|
||||
<svg
|
||||
width="24"
|
||||
height="24"
|
||||
viewBox="0 0 24 24"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
><style>
|
||||
.spinner_OSmW {
|
||||
transform-origin: center;
|
||||
animation: spinner_T6mA 0.75s step-end infinite;
|
||||
}
|
||||
@keyframes spinner_T6mA {
|
||||
8.3% {
|
||||
transform: rotate(30deg);
|
||||
}
|
||||
16.6% {
|
||||
transform: rotate(60deg);
|
||||
}
|
||||
25% {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
33.3% {
|
||||
transform: rotate(120deg);
|
||||
}
|
||||
41.6% {
|
||||
transform: rotate(150deg);
|
||||
}
|
||||
50% {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
58.3% {
|
||||
transform: rotate(210deg);
|
||||
}
|
||||
66.6% {
|
||||
transform: rotate(240deg);
|
||||
}
|
||||
75% {
|
||||
transform: rotate(270deg);
|
||||
}
|
||||
83.3% {
|
||||
transform: rotate(300deg);
|
||||
}
|
||||
91.6% {
|
||||
transform: rotate(330deg);
|
||||
}
|
||||
100% {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
</style><g class="spinner_OSmW"
|
||||
><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect
|
||||
x="11"
|
||||
y="1"
|
||||
width="2"
|
||||
height="5"
|
||||
transform="rotate(30 12 12)"
|
||||
opacity=".29"
|
||||
/><rect
|
||||
x="11"
|
||||
y="1"
|
||||
width="2"
|
||||
height="5"
|
||||
transform="rotate(60 12 12)"
|
||||
opacity=".43"
|
||||
/><rect
|
||||
x="11"
|
||||
y="1"
|
||||
width="2"
|
||||
height="5"
|
||||
transform="rotate(90 12 12)"
|
||||
opacity=".57"
|
||||
/><rect
|
||||
x="11"
|
||||
y="1"
|
||||
width="2"
|
||||
height="5"
|
||||
transform="rotate(120 12 12)"
|
||||
opacity=".71"
|
||||
/><rect
|
||||
x="11"
|
||||
y="1"
|
||||
width="2"
|
||||
height="5"
|
||||
transform="rotate(150 12 12)"
|
||||
opacity=".86"
|
||||
/><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g
|
||||
></svg
|
||||
>
|
||||
</div>
|
||||
{:else}
|
||||
<button
|
||||
type="button"
|
||||
class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full"
|
||||
on:click={async () => {
|
||||
await confirmRecording();
|
||||
}}
|
||||
>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="none"
|
||||
viewBox="0 0 24 24"
|
||||
stroke-width="2.5"
|
||||
stroke="currentColor"
|
||||
class="size-4"
|
||||
>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" />
|
||||
</svg>
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.visualizer {
|
||||
display: flex;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.visualizer-bar {
|
||||
width: 2px;
|
||||
background-color: #4a5aba; /* or whatever color you need */
|
||||
}
|
||||
</style>
|
@ -168,7 +168,7 @@
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={STTEngine}
|
||||
placeholder="Select a mode"
|
||||
placeholder="Select an engine"
|
||||
on:change={(e) => {
|
||||
if (e.target.value !== '') {
|
||||
navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
|
||||
@ -182,30 +182,12 @@
|
||||
}
|
||||
}}
|
||||
>
|
||||
<option value="">{$i18n.t('Default (Web API)')}</option>
|
||||
<option value="whisper-local">{$i18n.t('Whisper (Local)')}</option>
|
||||
<option value="">{$i18n.t('Default (Whisper)')}</option>
|
||||
<option value="web">{$i18n.t('Web API')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">{$i18n.t('Conversation Mode')}</div>
|
||||
|
||||
<button
|
||||
class="p-1 px-3 text-xs flex rounded transition"
|
||||
on:click={() => {
|
||||
toggleConversationMode();
|
||||
}}
|
||||
type="button"
|
||||
>
|
||||
{#if conversationMode === true}
|
||||
<span class="ml-2 self-center">{$i18n.t('On')}</span>
|
||||
{:else}
|
||||
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
|
||||
{/if}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" self-center text-xs font-medium">
|
||||
{$i18n.t('Auto-send input after 3 sec.')}
|
||||
|
20
src/lib/components/icons/Headphone.svelte
Normal file
20
src/lib/components/icons/Headphone.svelte
Normal file
@ -0,0 +1,20 @@
|
||||
<script lang="ts">
|
||||
export let className = 'w-4 h-4';
|
||||
export let strokeWidth = '0';
|
||||
</script>
|
||||
|
||||
<svg
|
||||
aria-hidden="true"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
fill="currentColor"
|
||||
viewBox="0 0 24 24"
|
||||
stroke-width={strokeWidth}
|
||||
stroke="currentColor"
|
||||
class={className}
|
||||
>
|
||||
<path
|
||||
fill-rule="evenodd"
|
||||
d="M12 5a7 7 0 0 0-7 7v1.17c.313-.11.65-.17 1-.17h2a1 1 0 0 1 1 1v6a1 1 0 0 1-1 1H6a3 3 0 0 1-3-3v-6a9 9 0 0 1 18 0v6a3 3 0 0 1-3 3h-2a1 1 0 0 1-1-1v-6a1 1 0 0 1 1-1h2c.35 0 .687.06 1 .17V12a7 7 0 0 0-7-7Z"
|
||||
clip-rule="evenodd"
|
||||
/>
|
||||
</svg>
|
Loading…
Reference in New Issue
Block a user