Merge pull request #29 from RodolfoCastanheira/main

detecting and using multiple languages automatically (xtts)
This commit is contained in:
matatonic 2024-07-01 13:55:58 -04:00 committed by GitHub
commit 54ad8a127b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 3 deletions

View File

@ -3,6 +3,7 @@ uvicorn
loguru
piper-tts
coqui-tts[languages]
langdetect
# Creating an environment where deepspeed works is complex, for now it will be disabled by default.
#deepspeed
@ -12,4 +13,4 @@ torchaudio; sys_platform != "darwin"
torch; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
torchaudio; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
# ROCM (Linux only) - use requirements.amd.txt
# ROCM (Linux only) - use requirements.amd.txt

View File

@ -16,7 +16,7 @@ from loguru import logger
from openedai import OpenAIStub, BadRequestError, ServiceUnavailableError
from pydantic import BaseModel
import uvicorn
from langdetect import detect
@contextlib.asynccontextmanager
async def lifespan(app):
@ -270,7 +270,21 @@ async def generate_speech(request: GenerateSpeechRequest):
# Pipe the output from piper/xtts to the input of ffmpeg
ffmpeg_args.extend(["-"])
language = voice_map.pop('language', 'en')
language = voice_map.pop('language', 'auto')
if language == 'auto':
try:
language = detect(input_text)
if language not in [
'en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr',
'ru', 'nl', 'cs', 'ar', 'zh-cn', 'hu', 'ko', 'ja', 'hi'
]:
logger.debug(f"Detected language {language} not supported, defaulting to en")
language = 'en'
else:
logger.debug(f"Detected language: {language}")
except:
language = 'en'
logger.debug(f"Failed to detect language, defaulting to en")
comment = voice_map.pop('comment', None) # ignored.