From cbd18ec63c3a540ae4d7dfb63d216ad1e26ba12f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 16:00:24 -0500 Subject: [PATCH] feat: external openai tts support --- backend/apps/audio/main.py | 92 +++++++++---------- src/lib/apis/audio/index.ts | 78 +++++++++++++++- .../chat/Messages/ResponseMessage.svelte | 13 ++- src/lib/components/chat/Settings/Audio.svelte | 60 ++++++++++-- .../documents/Settings/General.svelte | 18 ++-- 5 files changed, 187 insertions(+), 74 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index f7ce6fecd..2bee38c5a 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -101,61 +101,57 @@ async def update_openai_config( @app.post("/speech") async def speech(request: Request, user=Depends(get_verified_user)): - idx = None + body = await request.body() + name = hashlib.sha256(body).hexdigest() + + file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") + file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + + # Check if the file already exists in the cache + if file_path.is_file(): + return FileResponse(file_path) + + headers = {} + headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" + headers["Content-Type"] = "application/json" + + r = None try: - body = await request.body() - name = hashlib.sha256(body).hexdigest() + r = requests.post( + url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", + data=body, + headers=headers, + stream=True, + ) - file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") - file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + r.raise_for_status() - # Check if the file already exists in the cache - if file_path.is_file(): - return FileResponse(file_path) + # Save the streaming content to a file + with open(file_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) - headers = {} - headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" - headers["Content-Type"] = "application/json" + with open(file_body_path, "w") as f: + json.dump(json.loads(body.decode("utf-8")), f) - r = None - try: - r = requests.post( - url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", - data=body, - headers=headers, - stream=True, - ) + # Return the saved file + return FileResponse(file_path) - r.raise_for_status() + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']['message']}" + except: + error_detail = f"External: {e}" - # Save the streaming content to a file - with open(file_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - - with open(file_body_path, "w") as f: - json.dump(json.loads(body.decode("utf-8")), f) - - # Return the saved file - return FileResponse(file_path) - - except Exception as e: - log.exception(e) - error_detail = "Open WebUI: Server Connection Error" - if r is not None: - try: - res = r.json() - if "error" in res: - error_detail = f"External: {res['error']}" - except: - error_detail = f"External: {e}" - - raise HTTPException( - status_code=r.status_code if r else 500, detail=error_detail - ) - - except ValueError: - raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) + raise HTTPException( + status_code=r.status_code if r != None else 500, + detail=error_detail, + ) @app.post("/transcriptions") diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index 1919d0ee7..6679420d9 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -1,5 +1,67 @@ import { AUDIO_API_BASE_URL } from '$lib/constants'; +export const getAudioConfig = async (token: string) => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/config`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +type OpenAIConfigForm = { + url: string; + key: string; +}; + +export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + ...payload + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const transcribeAudio = async (token: string, file: File) => { const data = new FormData(); data.append('file', file); @@ -48,11 +110,17 @@ export const synthesizeOpenAISpeech = async ( input: text, voice: speaker }) - }).catch((err) => { - console.log(err); - error = err; - return null; - }); + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res; + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); if (error) { throw error; diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index fd2de7273..db8eba169 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -176,10 +176,12 @@ const toggleSpeakMessage = async () => { if (speaking) { - speechSynthesis.cancel(); + try { + speechSynthesis.cancel(); - sentencesAudio[speakingIdx].pause(); - sentencesAudio[speakingIdx].currentTime = 0; + sentencesAudio[speakingIdx].pause(); + sentencesAudio[speakingIdx].currentTime = 0; + } catch {} speaking = null; speakingIdx = null; @@ -221,6 +223,10 @@ sentence ).catch((error) => { toast.error(error); + + speaking = null; + loadingSpeech = false; + return null; }); @@ -230,7 +236,6 @@ const audio = new Audio(blobUrl); sentencesAudio[idx] = audio; loadingSpeech = false; - lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); } } diff --git a/src/lib/components/chat/Settings/Audio.svelte b/src/lib/components/chat/Settings/Audio.svelte index b6a8a859a..9a38d7ab0 100644 --- a/src/lib/components/chat/Settings/Audio.svelte +++ b/src/lib/components/chat/Settings/Audio.svelte @@ -1,4 +1,5 @@
{ + on:submit|preventDefault={async () => { + await updateConfigHandler(); saveSettings({ audio: { STTEngine: STTEngine !== '' ? STTEngine : undefined, @@ -101,7 +125,7 @@ dispatch('save'); }} > -
+
{$i18n.t('STT Settings')}
@@ -196,6 +220,24 @@
+ {#if TTSEngine === 'openai'} +
+ + + +
+ {/if} +
{$i18n.t('Auto-playback response')}
@@ -241,16 +283,18 @@
{$i18n.t('Set Voice')}
- +
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte index 18c501340..a2bbec852 100644 --- a/src/lib/components/documents/Settings/General.svelte +++ b/src/lib/components/documents/Settings/General.svelte @@ -29,8 +29,8 @@ let embeddingEngine = ''; let embeddingModel = ''; - let openAIKey = ''; - let openAIUrl = ''; + let OpenAIKey = ''; + let OpenAIUrl = ''; let chunkSize = 0; let chunkOverlap = 0; @@ -79,7 +79,7 @@ return; } - if ((embeddingEngine === 'openai' && openAIKey === '') || openAIUrl === '') { + if ((embeddingEngine === 'openai' && OpenAIKey === '') || OpenAIUrl === '') { toast.error($i18n.t('OpenAI URL/Key required.')); return; } @@ -93,8 +93,8 @@ ...(embeddingEngine === 'openai' ? { openai_config: { - key: openAIKey, - url: openAIUrl + key: OpenAIKey, + url: OpenAIUrl } } : {}) @@ -133,8 +133,8 @@ embeddingEngine = embeddingConfig.embedding_engine; embeddingModel = embeddingConfig.embedding_model; - openAIKey = embeddingConfig.openai_config.key; - openAIUrl = embeddingConfig.openai_config.url; + OpenAIKey = embeddingConfig.openai_config.key; + OpenAIUrl = embeddingConfig.openai_config.url; } }; @@ -192,14 +192,14 @@