diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index f7ce6fecd..2bee38c5a 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -101,61 +101,57 @@ async def update_openai_config( @app.post("/speech") async def speech(request: Request, user=Depends(get_verified_user)): - idx = None + body = await request.body() + name = hashlib.sha256(body).hexdigest() + + file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") + file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + + # Check if the file already exists in the cache + if file_path.is_file(): + return FileResponse(file_path) + + headers = {} + headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" + headers["Content-Type"] = "application/json" + + r = None try: - body = await request.body() - name = hashlib.sha256(body).hexdigest() + r = requests.post( + url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", + data=body, + headers=headers, + stream=True, + ) - file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") - file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + r.raise_for_status() - # Check if the file already exists in the cache - if file_path.is_file(): - return FileResponse(file_path) + # Save the streaming content to a file + with open(file_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) - headers = {} - headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" - headers["Content-Type"] = "application/json" + with open(file_body_path, "w") as f: + json.dump(json.loads(body.decode("utf-8")), f) - r = None - try: - r = requests.post( - url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", - data=body, - headers=headers, - stream=True, - ) + # Return the saved file + return FileResponse(file_path) - r.raise_for_status() + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']['message']}" + except: + error_detail = f"External: {e}" - # Save the streaming content to a file - with open(file_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - - with open(file_body_path, "w") as f: - json.dump(json.loads(body.decode("utf-8")), f) - - # Return the saved file - return FileResponse(file_path) - - except Exception as e: - log.exception(e) - error_detail = "Open WebUI: Server Connection Error" - if r is not None: - try: - res = r.json() - if "error" in res: - error_detail = f"External: {res['error']}" - except: - error_detail = f"External: {e}" - - raise HTTPException( - status_code=r.status_code if r else 500, detail=error_detail - ) - - except ValueError: - raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) + raise HTTPException( + status_code=r.status_code if r != None else 500, + detail=error_detail, + ) @app.post("/transcriptions") diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index 1919d0ee7..6679420d9 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -1,5 +1,67 @@ import { AUDIO_API_BASE_URL } from '$lib/constants'; +export const getAudioConfig = async (token: string) => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/config`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +type OpenAIConfigForm = { + url: string; + key: string; +}; + +export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + ...payload + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const transcribeAudio = async (token: string, file: File) => { const data = new FormData(); data.append('file', file); @@ -48,11 +110,17 @@ export const synthesizeOpenAISpeech = async ( input: text, voice: speaker }) - }).catch((err) => { - console.log(err); - error = err; - return null; - }); + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res; + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); if (error) { throw error; diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index fd2de7273..db8eba169 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -176,10 +176,12 @@ const toggleSpeakMessage = async () => { if (speaking) { - speechSynthesis.cancel(); + try { + speechSynthesis.cancel(); - sentencesAudio[speakingIdx].pause(); - sentencesAudio[speakingIdx].currentTime = 0; + sentencesAudio[speakingIdx].pause(); + sentencesAudio[speakingIdx].currentTime = 0; + } catch {} speaking = null; speakingIdx = null; @@ -221,6 +223,10 @@ sentence ).catch((error) => { toast.error(error); + + speaking = null; + loadingSpeech = false; + return null; }); @@ -230,7 +236,6 @@ const audio = new Audio(blobUrl); sentencesAudio[idx] = audio; loadingSpeech = false; - lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); } } diff --git a/src/lib/components/chat/Settings/Audio.svelte b/src/lib/components/chat/Settings/Audio.svelte index b6a8a859a..9a38d7ab0 100644 --- a/src/lib/components/chat/Settings/Audio.svelte +++ b/src/lib/components/chat/Settings/Audio.svelte @@ -1,4 +1,5 @@