0.2.0 rc5

This commit is contained in:
matatonic 2023-11-27 21:33:59 -05:00
parent eb1b2f12c7
commit b44992c806
9 changed files with 55 additions and 15 deletions

View File

@ -9,8 +9,8 @@ services:
tty: true
ports:
- "8000:8000"
#volumes:
# - .:/app/
volumes:
- .:/app/
# Below can be removed if not using GPU
runtime: nvidia
deploy:

7
download_samples.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/sh
for i in echo fable onyx nova shimmer; do
wget -q https://cdn.openai.com/API/docs/audio/$i.wav -O - | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/$i.wav
done
# in testing alloy sounded REALY BAD after cloning. Save it anyways, but use another as the default.
wget -q https://cdn.openai.com/API/docs/audio/alloy.wav -O - | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/alloy0.wav

View File

@ -2,4 +2,4 @@
export COQUI_TOS_AGREED=1
model="tts_models/multilingual/multi-dataset/xtts_v2"
python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('$model')"
$(cd voices/ && ./download_samples.sh)
./download_samples.sh

View File

@ -18,6 +18,7 @@ app = FastAPI()
class xtts_wrapper():
def __init__(self, model_name):
global args
self.xtts = TTS(model_name=model_name, progress_bar=False).to(args.xtts_device)
def tts(self, text, speaker_wav, speed):
@ -57,6 +58,7 @@ class GenerateSpeechRequest(BaseModel):
@app.post("/v1/audio/speech")
async def generate_speech(request: GenerateSpeechRequest):
global xtts, args
input_text = preprocess(request.input)
model = request.model
voice = request.voice
@ -132,7 +134,7 @@ if __name__ == "__main__":
parser.add_argument('--piper_cuda', action='store_true', default=False, help="Enable cuda for piper. Note: --cuda/onnxruntime-gpu is not working for me, but cpu is fast enough")
parser.add_argument('--xtts_device', action='store', default="cuda", help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
parser.add_argument('--preload_xtts', action='store_true', default=False, help="Preload the xtts model. By default it's loaded on first use.")
parser.add_argument('-P', '--port', action='store', default=8000, help="Server tcp port")
parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
parser.add_argument('-H', '--host', action='store', default='localhost', help="Host to listen on, Ex. 0.0.0.0")
args = parser.parse_args()

38
test_voices.sh Normal file
View File

@ -0,0 +1,38 @@
#!/bin/sh
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
\"model\": \"tts-1\",
\"input\": \"I'm going to play you the original voice, followed by the piper voice and finally the X T T S version 2 voice\",
\"voice\": \"echo\",
\"speed\": 1.0
}" | mpv --really-quiet -
for voice in alloy echo fable onyx nova shimmer ; do
echo $voice
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
\"model\": \"tts-1\",
\"input\": \"original\",
\"voice\": \"echo\",
\"speed\": 1.0
}" | mpv --really-quiet -
wget -q https://cdn.openai.com/API/docs/audio/$voice.wav -O - | mpv --really-quiet -
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
\"model\": \"tts-1\",
\"input\": \"The quick brown fox jumped over the lazy dog. This voice is called $voice, how do you like this voice?\",
\"voice\": \"$voice\",
\"speed\": 1.0
}" | mpv --really-quiet -
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
\"model\": \"tts-1-hd\",
\"input\": \"The quick brown fox jumped over the lazy dog. This HD voice is called $voice, how do you like this voice?\",
\"voice\": \"$voice\",
\"speed\": 1.0
}" | mpv --really-quiet -
done

View File

@ -32,13 +32,13 @@ tts-1-hd:
speaker: voices/fable.wav
onyx:
model: tts_models/multilingual/multi-dataset/xtts_v2
speaker: voices/voices/onyx.wav
speaker: voices/onyx.wav
nova:
model: tts_models/multilingual/multi-dataset/xtts_v2
speaker: voices/voices/nova.wav
speaker: voices/nova.wav
shimmer:
model: tts_models/multilingual/multi-dataset/xtts_v2
speaker: voices/voices/shimmer.wav
speaker: voices/shimmer.wav
me:
model: tts_models/multilingual/multi-dataset/xtts_v2
speaker: voices/voices/me.wav # this could be you
speaker: voices/me.wav # this could be you

BIN
voices/alloy.wav Normal file

Binary file not shown.

View File

@ -1,7 +0,0 @@
#!/bin/sh
wget -q https://cdn.openai.com/API/docs/audio/alloy.wav
wget -q https://cdn.openai.com/API/docs/audio/echo.wav
wget -q https://cdn.openai.com/API/docs/audio/fable.wav
wget -q https://cdn.openai.com/API/docs/audio/onyx.wav
wget -q https://cdn.openai.com/API/docs/audio/nova.wav
wget -q https://cdn.openai.com/API/docs/audio/shimmer.wav

View File