diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index bc998dae8..d963cd632 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -125,12 +125,16 @@ def upload_file( ) if process: try: - if file.content_type in [ - "audio/mpeg", - "audio/wav", - "audio/ogg", - "audio/x-m4a", - ]: + + if file.content_type.startswith( + ( + "audio/mpeg", + "audio/wav", + "audio/ogg", + "audio/x-m4a", + "audio/webm", + ) + ): file_path = Storage.get_file(file_path) result = transcribe(request, file_path) diff --git a/src/lib/components/chat/MessageInput/VoiceRecording.svelte b/src/lib/components/chat/MessageInput/VoiceRecording.svelte index 941c814d7..6bbb161ed 100644 --- a/src/lib/components/chat/MessageInput/VoiceRecording.svelte +++ b/src/lib/components/chat/MessageInput/VoiceRecording.svelte @@ -6,6 +6,10 @@ import { transcribeAudio } from '$lib/apis/audio'; + import dayjs from 'dayjs'; + import LocalizedFormat from 'dayjs/plugin/localizedFormat'; + dayjs.extend(LocalizedFormat); + const i18n = getContext('i18n'); export let recording = false; @@ -134,11 +138,11 @@ detectSound(); }; - const onStopHandler = async (audioBlob) => { + const onStopHandler = async (audioBlob, ext: string = 'wav') => { // Create a blob from the audio chunks await tick(); - const file = blobToFile(audioBlob, 'recording.wav'); + const file = blobToFile(audioBlob, `Recording-${dayjs().format('L LT')}.${ext}`); if (transcribe) { if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') { @@ -163,32 +167,23 @@ } }; - const saveRecording = (blob) => { - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - document.body.appendChild(a); - a.style = 'display: none'; - a.href = url; - a.download = 'recording.wav'; - a.click(); - window.URL.revokeObjectURL(url); - }; - const startRecording = async () => { loading = true; try { if (displayMedia) { - stream = await navigator.mediaDevices.getDisplayMedia({ - video: { - mediaSource: 'screen' - }, - audio: { - echoCancellation: true, - noiseSuppression: true, - autoGainControl: true - } + const mediaStream = await navigator.mediaDevices.getDisplayMedia({ + audio: true }); + + stream = new MediaStream(); + for (const track of mediaStream.getAudioTracks()) { + stream.addTrack(track); + } + + for (const track of mediaStream.getVideoTracks()) { + track.stop(); + } } else { stream = await navigator.mediaDevices.getUserMedia({ audio: { @@ -206,7 +201,10 @@ return; } - mediaRecorder = new MediaRecorder(stream); + mediaRecorder = new MediaRecorder(stream, { + mimeType: 'audio/webm; codecs=opus' + }); + mediaRecorder.onstart = () => { console.log('Recording started'); loading = false; @@ -220,8 +218,19 @@ console.log('Recording stopped'); if (confirmed) { - const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); - await onStopHandler(audioBlob); + // Use the actual type provided by MediaRecorder + let type = audioChunks[0]?.type || mediaRecorder.mimeType || 'audio/webm'; + + // split `/` and `;` to get the extension + let ext = type.split('/')[1].split(';')[0] || 'webm'; + + // If not audio, default to audio/webm + if (!type.startsWith('audio/')) { + ext = 'webm'; + } + + const audioBlob = new Blob(audioChunks, { type: type }); + await onStopHandler(audioBlob, ext); confirmed = false; loading = false; diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index 1ff724638..42b88372d 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -29,7 +29,8 @@ (item?.name && item?.name.toLowerCase().endsWith('.mp3')) || (item?.name && item?.name.toLowerCase().endsWith('.wav')) || (item?.name && item?.name.toLowerCase().endsWith('.ogg')) || - (item?.name && item?.name.toLowerCase().endsWith('.m4a')); + (item?.name && item?.name.toLowerCase().endsWith('.m4a')) || + (item?.name && item?.name.toLowerCase().endsWith('.webm')); onMount(() => { console.log(item);