mirror of
https://github.com/open-webui/open-webui
synced 2025-05-17 03:54:02 +00:00
Merge pull request #9242 from NachoNoCheese/dev
feat: Add Deepgram STT Support
This commit is contained in:
commit
8c2164928a
@ -2012,6 +2012,12 @@ WHISPER_MODEL_AUTO_UPDATE = (
|
|||||||
and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true"
|
and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add Deepgram configuration
|
||||||
|
DEEPGRAM_API_KEY = PersistentConfig(
|
||||||
|
"DEEPGRAM_API_KEY",
|
||||||
|
"audio.stt.deepgram.api_key",
|
||||||
|
os.getenv("DEEPGRAM_API_KEY", ""),
|
||||||
|
)
|
||||||
|
|
||||||
AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig(
|
AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig(
|
||||||
"AUDIO_STT_OPENAI_API_BASE_URL",
|
"AUDIO_STT_OPENAI_API_BASE_URL",
|
||||||
|
@ -130,6 +130,7 @@ from open_webui.config import (
|
|||||||
AUDIO_TTS_AZURE_SPEECH_REGION,
|
AUDIO_TTS_AZURE_SPEECH_REGION,
|
||||||
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
|
||||||
WHISPER_MODEL,
|
WHISPER_MODEL,
|
||||||
|
DEEPGRAM_API_KEY,
|
||||||
WHISPER_MODEL_AUTO_UPDATE,
|
WHISPER_MODEL_AUTO_UPDATE,
|
||||||
WHISPER_MODEL_DIR,
|
WHISPER_MODEL_DIR,
|
||||||
# Retrieval
|
# Retrieval
|
||||||
@ -611,6 +612,7 @@ app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
|
|||||||
app.state.config.STT_MODEL = AUDIO_STT_MODEL
|
app.state.config.STT_MODEL = AUDIO_STT_MODEL
|
||||||
|
|
||||||
app.state.config.WHISPER_MODEL = WHISPER_MODEL
|
app.state.config.WHISPER_MODEL = WHISPER_MODEL
|
||||||
|
app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
|
||||||
|
|
||||||
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
|
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
|
||||||
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
|
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
|
||||||
|
@ -11,6 +11,7 @@ from pydub.silence import split_on_silence
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import aiofiles
|
import aiofiles
|
||||||
import requests
|
import requests
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
from fastapi import (
|
from fastapi import (
|
||||||
Depends,
|
Depends,
|
||||||
@ -138,6 +139,7 @@ class STTConfigForm(BaseModel):
|
|||||||
ENGINE: str
|
ENGINE: str
|
||||||
MODEL: str
|
MODEL: str
|
||||||
WHISPER_MODEL: str
|
WHISPER_MODEL: str
|
||||||
|
DEEPGRAM_API_KEY: str
|
||||||
|
|
||||||
|
|
||||||
class AudioConfigUpdateForm(BaseModel):
|
class AudioConfigUpdateForm(BaseModel):
|
||||||
@ -165,6 +167,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"ENGINE": request.app.state.config.STT_ENGINE,
|
"ENGINE": request.app.state.config.STT_ENGINE,
|
||||||
"MODEL": request.app.state.config.STT_MODEL,
|
"MODEL": request.app.state.config.STT_MODEL,
|
||||||
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
||||||
|
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,6 +193,7 @@ async def update_audio_config(
|
|||||||
request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
|
request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
|
||||||
request.app.state.config.STT_MODEL = form_data.stt.MODEL
|
request.app.state.config.STT_MODEL = form_data.stt.MODEL
|
||||||
request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
|
request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
|
||||||
|
request.app.state.config.DEEPGRAM_API_KEY = form_data.stt.DEEPGRAM_API_KEY
|
||||||
|
|
||||||
if request.app.state.config.STT_ENGINE == "":
|
if request.app.state.config.STT_ENGINE == "":
|
||||||
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
request.app.state.faster_whisper_model = set_faster_whisper_model(
|
||||||
@ -214,6 +218,7 @@ async def update_audio_config(
|
|||||||
"ENGINE": request.app.state.config.STT_ENGINE,
|
"ENGINE": request.app.state.config.STT_ENGINE,
|
||||||
"MODEL": request.app.state.config.STT_MODEL,
|
"MODEL": request.app.state.config.STT_MODEL,
|
||||||
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
"WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
|
||||||
|
"DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -521,6 +526,65 @@ def transcribe(request: Request, file_path):
|
|||||||
|
|
||||||
raise Exception(detail if detail else "Open WebUI: Server Connection Error")
|
raise Exception(detail if detail else "Open WebUI: Server Connection Error")
|
||||||
|
|
||||||
|
elif request.app.state.config.STT_ENGINE == "deepgram":
|
||||||
|
try:
|
||||||
|
# Determine the MIME type of the file
|
||||||
|
mime, _ = mimetypes.guess_type(file_path)
|
||||||
|
if not mime:
|
||||||
|
mime = "audio/wav" # fallback to wav if undetectable
|
||||||
|
|
||||||
|
# Read the audio file
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
file_data = f.read()
|
||||||
|
|
||||||
|
# Build headers and parameters
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Token {request.app.state.config.DEEPGRAM_API_KEY}",
|
||||||
|
"Content-Type": mime,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add model if specified
|
||||||
|
params = {}
|
||||||
|
if request.app.state.config.STT_MODEL:
|
||||||
|
params["model"] = request.app.state.config.STT_MODEL
|
||||||
|
|
||||||
|
# Make request to Deepgram API
|
||||||
|
r = requests.post(
|
||||||
|
"https://api.deepgram.com/v1/listen",
|
||||||
|
headers=headers,
|
||||||
|
params=params,
|
||||||
|
data=file_data,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
response_data = r.json()
|
||||||
|
|
||||||
|
# Extract transcript from Deepgram response
|
||||||
|
try:
|
||||||
|
transcript = response_data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
|
||||||
|
except (KeyError, IndexError) as e:
|
||||||
|
log.error(f"Malformed response from Deepgram: {str(e)}")
|
||||||
|
raise Exception("Failed to parse Deepgram response - unexpected response format")
|
||||||
|
data = {"text": transcript.strip()}
|
||||||
|
|
||||||
|
# Save transcript
|
||||||
|
transcript_file = f"{file_dir}/{id}.json"
|
||||||
|
with open(transcript_file, "w") as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
detail = None
|
||||||
|
if r is not None:
|
||||||
|
try:
|
||||||
|
res = r.json()
|
||||||
|
if "error" in res:
|
||||||
|
detail = f"External: {res['error'].get('message', '')}"
|
||||||
|
except Exception:
|
||||||
|
detail = f"External: {e}"
|
||||||
|
raise Exception(detail if detail else "Open WebUI: Server Connection Error")
|
||||||
|
|
||||||
|
|
||||||
def compress_audio(file_path):
|
def compress_audio(file_path):
|
||||||
if os.path.getsize(file_path) > MAX_FILE_SIZE:
|
if os.path.getsize(file_path) > MAX_FILE_SIZE:
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
let STT_ENGINE = '';
|
let STT_ENGINE = '';
|
||||||
let STT_MODEL = '';
|
let STT_MODEL = '';
|
||||||
let STT_WHISPER_MODEL = '';
|
let STT_WHISPER_MODEL = '';
|
||||||
|
let STT_DEEPGRAM_API_KEY = '';
|
||||||
|
|
||||||
let STT_WHISPER_MODEL_LOADING = false;
|
let STT_WHISPER_MODEL_LOADING = false;
|
||||||
|
|
||||||
@ -103,7 +104,8 @@
|
|||||||
OPENAI_API_KEY: STT_OPENAI_API_KEY,
|
OPENAI_API_KEY: STT_OPENAI_API_KEY,
|
||||||
ENGINE: STT_ENGINE,
|
ENGINE: STT_ENGINE,
|
||||||
MODEL: STT_MODEL,
|
MODEL: STT_MODEL,
|
||||||
WHISPER_MODEL: STT_WHISPER_MODEL
|
WHISPER_MODEL: STT_WHISPER_MODEL,
|
||||||
|
DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -143,6 +145,7 @@
|
|||||||
STT_ENGINE = res.stt.ENGINE;
|
STT_ENGINE = res.stt.ENGINE;
|
||||||
STT_MODEL = res.stt.MODEL;
|
STT_MODEL = res.stt.MODEL;
|
||||||
STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
|
STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
|
||||||
|
STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
|
||||||
}
|
}
|
||||||
|
|
||||||
await getVoices();
|
await getVoices();
|
||||||
@ -173,6 +176,7 @@
|
|||||||
<option value="">{$i18n.t('Whisper (Local)')}</option>
|
<option value="">{$i18n.t('Whisper (Local)')}</option>
|
||||||
<option value="openai">OpenAI</option>
|
<option value="openai">OpenAI</option>
|
||||||
<option value="web">{$i18n.t('Web API')}</option>
|
<option value="web">{$i18n.t('Web API')}</option>
|
||||||
|
<option value="deepgram">Deepgram</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -210,6 +214,37 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{:else if STT_ENGINE === 'deepgram'}
|
||||||
|
<div>
|
||||||
|
<div class="mt-1 flex gap-2 mb-1">
|
||||||
|
<SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_DEEPGRAM_API_KEY} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<hr class=" dark:border-gray-850 my-2" />
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
|
||||||
|
<div class="flex w-full">
|
||||||
|
<div class="flex-1">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||||
|
bind:value={STT_MODEL}
|
||||||
|
placeholder="Select a model (optional)"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
|
||||||
|
{$i18n.t('Leave model field empty to use the default model.')}
|
||||||
|
<a
|
||||||
|
class=" hover:underline dark:text-gray-200 text-gray-800"
|
||||||
|
href="https://developers.deepgram.com/docs/models"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
{$i18n.t('Click here to see available models.')}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{:else if STT_ENGINE === ''}
|
{:else if STT_ENGINE === ''}
|
||||||
<div>
|
<div>
|
||||||
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
|
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
|
||||||
|
Loading…
Reference in New Issue
Block a user