From 4a73a01c248b5ff33be8758adfbccbb4a9648d78 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Thu, 6 Jun 2024 20:44:42 -0700
Subject: [PATCH] refac

---
 backend/apps/audio/main.py                    | 18 +++++++++++---
 src/lib/components/chat/MessageInput.svelte   | 17 +++++++++++--
 src/lib/components/chat/Settings/Audio.svelte | 24 +++----------------
 3 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py
index 0f65a551e..216c6d42b 100644
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -17,7 +17,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from faster_whisper import WhisperModel
 from pydantic import BaseModel
 
-
+import uuid
 import requests
 import hashlib
 from pathlib import Path
@@ -181,8 +181,15 @@ def transcribe(
         )
 
     try:
-        filename = file.filename
-        file_path = f"{UPLOAD_DIR}/{filename}"
+        ext = file.filename.split(".")[-1]
+
+        id = uuid.uuid4()
+        filename = f"{id}.{ext}"
+
+        file_dir = f"{CACHE_DIR}/audio/transcriptions"
+        os.makedirs(file_dir, exist_ok=True)
+        file_path = f"{file_dir}/{filename}"
+
         contents = file.file.read()
         with open(file_path, "wb") as f:
             f.write(contents)
@@ -215,6 +222,11 @@ def transcribe(
 
         transcript = "".join([segment.text for segment in list(segments)])
 
+        # save the transcript to a json file
+        transcript_file = f"{file_dir}/{id}.json"
+        with open(transcript_file, "w") as f:
+            json.dump({"transcript": transcript}, f)
+
         return {"text": transcript.strip()}
 
     except Exception as e:
diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte
index ade0f5bf8..736c32f98 100644
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -842,8 +842,21 @@
 												id="voice-input-button"
 												class=" text-gray-600 dark:text-gray-300 hover:bg-gray-50 dark:hover:bg-gray-850 transition rounded-full p-1.5 mr-0.5 self-center"
 												type="button"
-												on:click={() => {
-													recording = true;
+												on:click={async () => {
+													const res = await navigator.mediaDevices
+														.getUserMedia({ audio: true })
+														.catch(function (err) {
+															toast.error(
+																$i18n.t(`Permission denied when accessing microphone: {{error}}`, {
+																	error: err
+																})
+															);
+															return null;
+														});
+
+													if (res) {
+														recording = true;
+													}
 												}}
 											>
 												<svg
diff --git a/src/lib/components/chat/Settings/Audio.svelte b/src/lib/components/chat/Settings/Audio.svelte
index 039b710af..8c65ff39a 100644
--- a/src/lib/components/chat/Settings/Audio.svelte
+++ b/src/lib/components/chat/Settings/Audio.svelte
@@ -168,7 +168,7 @@
 					<select
 						class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 						bind:value={STTEngine}
-						placeholder="Select a mode"
+						placeholder="Select an engine"
 						on:change={(e) => {
 							if (e.target.value !== '') {
 								navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
@@ -182,30 +182,12 @@
 							}
 						}}
 					>
-						<option value="">{$i18n.t('Default (Web API)')}</option>
-						<option value="whisper-local">{$i18n.t('Whisper (Local)')}</option>
+						<option value="">{$i18n.t('Default (Whisper)')}</option>
+						<option value="web">{$i18n.t('Web API')}</option>
 					</select>
 				</div>
 			</div>
 
-			<div class=" py-0.5 flex w-full justify-between">
-				<div class=" self-center text-xs font-medium">{$i18n.t('Conversation Mode')}</div>
-
-				<button
-					class="p-1 px-3 text-xs flex rounded transition"
-					on:click={() => {
-						toggleConversationMode();
-					}}
-					type="button"
-				>
-					{#if conversationMode === true}
-						<span class="ml-2 self-center">{$i18n.t('On')}</span>
-					{:else}
-						<span class="ml-2 self-center">{$i18n.t('Off')}</span>
-					{/if}
-				</button>
-			</div>
-
 			<div class=" py-0.5 flex w-full justify-between">
 				<div class=" self-center text-xs font-medium">
 					{$i18n.t('Auto-send input after 3 sec.')}