mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	Add support for Deepgram STT
This commit is contained in:
		
							parent
							
								
									b72150c881
								
							
						
					
					
						commit
						5df474abb9
					
				@ -1954,6 +1954,12 @@ WHISPER_MODEL_AUTO_UPDATE = (
 | 
			
		||||
    and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Add Deepgram configuration
 | 
			
		||||
DEEPGRAM_API_KEY = PersistentConfig(
 | 
			
		||||
    "DEEPGRAM_API_KEY",
 | 
			
		||||
    "audio.stt.deepgram.api_key",
 | 
			
		||||
    os.getenv("DEEPGRAM_API_KEY", ""),
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig(
 | 
			
		||||
    "AUDIO_STT_OPENAI_API_BASE_URL",
 | 
			
		||||
 | 
			
		||||
@ -130,6 +130,7 @@ from open_webui.config import (
 | 
			
		||||
    AUDIO_TTS_AZURE_SPEECH_REGION,
 | 
			
		||||
    AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
 | 
			
		||||
    WHISPER_MODEL,
 | 
			
		||||
    DEEPGRAM_API_KEY,
 | 
			
		||||
    WHISPER_MODEL_AUTO_UPDATE,
 | 
			
		||||
    WHISPER_MODEL_DIR,
 | 
			
		||||
    # Retrieval
 | 
			
		||||
@ -609,6 +610,7 @@ app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
 | 
			
		||||
app.state.config.STT_MODEL = AUDIO_STT_MODEL
 | 
			
		||||
 | 
			
		||||
app.state.config.WHISPER_MODEL = WHISPER_MODEL
 | 
			
		||||
app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
 | 
			
		||||
 | 
			
		||||
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
 | 
			
		||||
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
 | 
			
		||||
 | 
			
		||||
@ -11,6 +11,7 @@ from pydub.silence import split_on_silence
 | 
			
		||||
import aiohttp
 | 
			
		||||
import aiofiles
 | 
			
		||||
import requests
 | 
			
		||||
import mimetypes
 | 
			
		||||
 | 
			
		||||
from fastapi import (
 | 
			
		||||
    Depends,
 | 
			
		||||
@ -138,6 +139,7 @@ class STTConfigForm(BaseModel):
 | 
			
		||||
    ENGINE: str
 | 
			
		||||
    MODEL: str
 | 
			
		||||
    WHISPER_MODEL: str
 | 
			
		||||
    DEEPGRAM_API_KEY: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AudioConfigUpdateForm(BaseModel):
 | 
			
		||||
@ -165,6 +167,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)):
 | 
			
		||||
            "ENGINE": request.app.state.config.STT_ENGINE,
 | 
			
		||||
            "MODEL": request.app.state.config.STT_MODEL,
 | 
			
		||||
            "WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
 | 
			
		||||
            "DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -190,6 +193,7 @@ async def update_audio_config(
 | 
			
		||||
    request.app.state.config.STT_ENGINE = form_data.stt.ENGINE
 | 
			
		||||
    request.app.state.config.STT_MODEL = form_data.stt.MODEL
 | 
			
		||||
    request.app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
 | 
			
		||||
    request.app.state.config.DEEPGRAM_API_KEY = form_data.stt.DEEPGRAM_API_KEY
 | 
			
		||||
 | 
			
		||||
    if request.app.state.config.STT_ENGINE == "":
 | 
			
		||||
        request.app.state.faster_whisper_model = set_faster_whisper_model(
 | 
			
		||||
@ -214,6 +218,7 @@ async def update_audio_config(
 | 
			
		||||
            "ENGINE": request.app.state.config.STT_ENGINE,
 | 
			
		||||
            "MODEL": request.app.state.config.STT_MODEL,
 | 
			
		||||
            "WHISPER_MODEL": request.app.state.config.WHISPER_MODEL,
 | 
			
		||||
            "DEEPGRAM_API_KEY": request.app.state.config.DEEPGRAM_API_KEY,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -521,6 +526,65 @@ def transcribe(request: Request, file_path):
 | 
			
		||||
 | 
			
		||||
            raise Exception(detail if detail else "Open WebUI: Server Connection Error")
 | 
			
		||||
 | 
			
		||||
    elif request.app.state.config.STT_ENGINE == "deepgram":
 | 
			
		||||
        try:
 | 
			
		||||
            # Determine the MIME type of the file
 | 
			
		||||
            mime, _ = mimetypes.guess_type(file_path)
 | 
			
		||||
            if not mime:
 | 
			
		||||
                mime = "audio/wav"  # fallback to wav if undetectable
 | 
			
		||||
 | 
			
		||||
            # Read the audio file
 | 
			
		||||
            with open(file_path, "rb") as f:
 | 
			
		||||
                file_data = f.read()
 | 
			
		||||
 | 
			
		||||
            # Build headers and parameters
 | 
			
		||||
            headers = {
 | 
			
		||||
                "Authorization": f"Token {request.app.state.config.DEEPGRAM_API_KEY}",
 | 
			
		||||
                "Content-Type": mime,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            # Add model if specified
 | 
			
		||||
            params = {}
 | 
			
		||||
            if request.app.state.config.STT_MODEL:
 | 
			
		||||
                params["model"] = request.app.state.config.STT_MODEL
 | 
			
		||||
 | 
			
		||||
            # Make request to Deepgram API
 | 
			
		||||
            r = requests.post(
 | 
			
		||||
                "https://api.deepgram.com/v1/listen",
 | 
			
		||||
                headers=headers,
 | 
			
		||||
                params=params,
 | 
			
		||||
                data=file_data,
 | 
			
		||||
            )
 | 
			
		||||
            r.raise_for_status()
 | 
			
		||||
            response_data = r.json()
 | 
			
		||||
 | 
			
		||||
            # Extract transcript from Deepgram response
 | 
			
		||||
            try:
 | 
			
		||||
                transcript = response_data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
 | 
			
		||||
            except (KeyError, IndexError) as e:
 | 
			
		||||
                log.error(f"Malformed response from Deepgram: {str(e)}")
 | 
			
		||||
                raise Exception("Failed to parse Deepgram response - unexpected response format")
 | 
			
		||||
            data = {"text": transcript.strip()}
 | 
			
		||||
 | 
			
		||||
            # Save transcript
 | 
			
		||||
            transcript_file = f"{file_dir}/{id}.json"
 | 
			
		||||
            with open(transcript_file, "w") as f:
 | 
			
		||||
                json.dump(data, f)
 | 
			
		||||
 | 
			
		||||
            return data
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            log.exception(e)
 | 
			
		||||
            detail = None
 | 
			
		||||
            if r is not None:
 | 
			
		||||
                try:
 | 
			
		||||
                    res = r.json()
 | 
			
		||||
                    if "error" in res:
 | 
			
		||||
                        detail = f"External: {res['error'].get('message', '')}"
 | 
			
		||||
                except Exception:
 | 
			
		||||
                    detail = f"External: {e}"
 | 
			
		||||
            raise Exception(detail if detail else "Open WebUI: Server Connection Error")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def compress_audio(file_path):
 | 
			
		||||
    if os.path.getsize(file_path) > MAX_FILE_SIZE:
 | 
			
		||||
 | 
			
		||||
@ -39,6 +39,7 @@
 | 
			
		||||
	let STT_ENGINE = '';
 | 
			
		||||
	let STT_MODEL = '';
 | 
			
		||||
	let STT_WHISPER_MODEL = '';
 | 
			
		||||
	let STT_DEEPGRAM_API_KEY = '';
 | 
			
		||||
 | 
			
		||||
	let STT_WHISPER_MODEL_LOADING = false;
 | 
			
		||||
 | 
			
		||||
@ -103,7 +104,8 @@
 | 
			
		||||
				OPENAI_API_KEY: STT_OPENAI_API_KEY,
 | 
			
		||||
				ENGINE: STT_ENGINE,
 | 
			
		||||
				MODEL: STT_MODEL,
 | 
			
		||||
				WHISPER_MODEL: STT_WHISPER_MODEL
 | 
			
		||||
				WHISPER_MODEL: STT_WHISPER_MODEL,
 | 
			
		||||
				DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY
 | 
			
		||||
			}
 | 
			
		||||
		});
 | 
			
		||||
 | 
			
		||||
@ -143,6 +145,7 @@
 | 
			
		||||
			STT_ENGINE = res.stt.ENGINE;
 | 
			
		||||
			STT_MODEL = res.stt.MODEL;
 | 
			
		||||
			STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
 | 
			
		||||
			STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		await getVoices();
 | 
			
		||||
@ -173,6 +176,7 @@
 | 
			
		||||
							<option value="">{$i18n.t('Whisper (Local)')}</option>
 | 
			
		||||
							<option value="openai">OpenAI</option>
 | 
			
		||||
							<option value="web">{$i18n.t('Web API')}</option>
 | 
			
		||||
							<option value="deepgram">Deepgram</option>
 | 
			
		||||
						</select>
 | 
			
		||||
					</div>
 | 
			
		||||
				</div>
 | 
			
		||||
@ -210,6 +214,37 @@
 | 
			
		||||
							</div>
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
				{:else if STT_ENGINE === 'deepgram'}
 | 
			
		||||
					<div>
 | 
			
		||||
						<div class="mt-1 flex gap-2 mb-1">
 | 
			
		||||
							<SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_DEEPGRAM_API_KEY} />
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
 | 
			
		||||
					<hr class=" dark:border-gray-850 my-2" />
 | 
			
		||||
 | 
			
		||||
					<div>
 | 
			
		||||
						<div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
 | 
			
		||||
						<div class="flex w-full">
 | 
			
		||||
							<div class="flex-1">
 | 
			
		||||
								<input
 | 
			
		||||
									class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
 | 
			
		||||
									bind:value={STT_MODEL}
 | 
			
		||||
									placeholder="Select a model (optional)"
 | 
			
		||||
								/>
 | 
			
		||||
							</div>
 | 
			
		||||
						</div>
 | 
			
		||||
						<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
 | 
			
		||||
							{$i18n.t('Leave model field empty to use the default model.')}
 | 
			
		||||
							<a
 | 
			
		||||
								class=" hover:underline dark:text-gray-200 text-gray-800"
 | 
			
		||||
								href="https://developers.deepgram.com/docs/models"
 | 
			
		||||
								target="_blank"
 | 
			
		||||
							>
 | 
			
		||||
								{$i18n.t('Click here to see available models.')}
 | 
			
		||||
							</a>
 | 
			
		||||
						</div>
 | 
			
		||||
					</div>
 | 
			
		||||
				{:else if STT_ENGINE === ''}
 | 
			
		||||
					<div>
 | 
			
		||||
						<div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user