This commit is contained in:
Timothy J. Baek 2024-09-19 02:40:54 +02:00
parent 4d9677e808
commit afa42dd2e4
2 changed files with 100 additions and 97 deletions

View File

@ -161,7 +161,9 @@ async def update_audio_config(
app.state.config.TTS_VOICE = form_data.tts.VOICE app.state.config.TTS_VOICE = form_data.tts.VOICE
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION app.state.config.TTS_AZURE_SPEECH_REGION = form_data.tts.AZURE_SPEECH_REGION
app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT app.state.config.TTS_AZURE_SPEECH_OUTPUT_FORMAT = (
form_data.tts.AZURE_SPEECH_OUTPUT_FORMAT
)
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
@ -314,7 +316,7 @@ async def speech(request: Request, user=Depends(get_verified_user)):
detail=error_detail, detail=error_detail,
) )
elif app.state.config.TTS_ENGINE == "azurespeechservice": elif app.state.config.TTS_ENGINE == "azure":
payload = None payload = None
try: try:
payload = json.loads(body.decode("utf-8")) payload = json.loads(body.decode("utf-8"))
@ -329,9 +331,9 @@ async def speech(request: Request, user=Depends(get_verified_user)):
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1" url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1"
headers = { headers = {
'Ocp-Apim-Subscription-Key': app.state.config.TTS_API_KEY, "Ocp-Apim-Subscription-Key": app.state.config.TTS_API_KEY,
'Content-Type': 'application/ssml+xml', "Content-Type": "application/ssml+xml",
'X-Microsoft-OutputFormat': output_format "X-Microsoft-OutputFormat": output_format,
} }
data = f"""<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{locale}"> data = f"""<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{locale}">
@ -347,9 +349,8 @@ async def speech(request: Request, user=Depends(get_verified_user)):
else: else:
log.error(f"Error synthesizing speech - {response.reason}") log.error(f"Error synthesizing speech - {response.reason}")
raise HTTPException( raise HTTPException(
status_code=500, status_code=500, detail=f"Error synthesizing speech - {response.reason}"
detail=f"Error synthesizing speech - {response.reason}") )
@app.post("/transcriptions") @app.post("/transcriptions")
@ -528,23 +529,22 @@ def get_available_voices() -> dict:
except Exception: except Exception:
# Avoided @lru_cache with exception # Avoided @lru_cache with exception
pass pass
elif app.state.config.TTS_ENGINE == "azurespeechservice": elif app.state.config.TTS_ENGINE == "azure":
try: try:
region = app.state.config.TTS_AZURE_SPEECH_REGION region = app.state.config.TTS_AZURE_SPEECH_REGION
url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list" url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
headers = { headers = {"Ocp-Apim-Subscription-Key": app.state.config.TTS_API_KEY}
'Ocp-Apim-Subscription-Key': app.state.config.TTS_API_KEY
}
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
response.raise_for_status() response.raise_for_status()
voices = response.json() voices = response.json()
for voice in voices: for voice in voices:
ret[voice['ShortName']] = f"{voice['DisplayName']} ({voice['ShortName']})" ret[voice["ShortName"]] = (
f"{voice['DisplayName']} ({voice['ShortName']})"
)
except requests.RequestException as e: except requests.RequestException as e:
log.error(f"Error fetching voices: {str(e)}") log.error(f"Error fetching voices: {str(e)}")
return ret return ret

View File

@ -91,7 +91,7 @@
VOICE: TTS_VOICE, VOICE: TTS_VOICE,
SPLIT_ON: TTS_SPLIT_ON, SPLIT_ON: TTS_SPLIT_ON,
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION, AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT, AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT
}, },
stt: { stt: {
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL, OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
@ -231,7 +231,7 @@
<option value="">{$i18n.t('Web API')}</option> <option value="">{$i18n.t('Web API')}</option>
<option value="openai">{$i18n.t('OpenAI')}</option> <option value="openai">{$i18n.t('OpenAI')}</option>
<option value="elevenlabs">{$i18n.t('ElevenLabs')}</option> <option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
<option value="azurespeechservice">{$i18n.t('Azure Speech service')}</option> <option value="azure">{$i18n.t('Azure AI Speech')}</option>
</select> </select>
</div> </div>
</div> </div>
@ -260,7 +260,7 @@
/> />
</div> </div>
</div> </div>
{:else if TTS_ENGINE === 'azurespeechservice'} {:else if TTS_ENGINE === 'azure'}
<div> <div>
<div class="mt-1 flex gap-2 mb-1"> <div class="mt-1 flex gap-2 mb-1">
<input <input
@ -384,7 +384,7 @@
</div> </div>
</div> </div>
</div> </div>
{:else if TTS_ENGINE === 'azurespeechservice'} {:else if TTS_ENGINE === 'azure'}
<div class=" flex gap-2"> <div class=" flex gap-2">
<div class="w-full"> <div class="w-full">
<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div> <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
@ -408,7 +408,10 @@
<div class="w-full"> <div class="w-full">
<div class=" mb-1.5 text-sm font-medium"> <div class=" mb-1.5 text-sm font-medium">
{$i18n.t('Output format')} {$i18n.t('Output format')}
<a href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs" target="_blank" > <a
href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs"
target="_blank"
>
<small>{$i18n.t('Available list')}</small> <small>{$i18n.t('Available list')}</small>
</a> </a>
</div> </div>