mirror of
https://github.com/matatonic/openedai-speech
synced 2025-06-26 18:16:32 +00:00
0.2.0 rc5
This commit is contained in:
parent
eb1b2f12c7
commit
b44992c806
@ -9,8 +9,8 @@ services:
|
||||
tty: true
|
||||
ports:
|
||||
- "8000:8000"
|
||||
#volumes:
|
||||
# - .:/app/
|
||||
volumes:
|
||||
- .:/app/
|
||||
# Below can be removed if not using GPU
|
||||
runtime: nvidia
|
||||
deploy:
|
||||
|
||||
7
download_samples.sh
Executable file
7
download_samples.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
for i in echo fable onyx nova shimmer; do
|
||||
wget -q https://cdn.openai.com/API/docs/audio/$i.wav -O - | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/$i.wav
|
||||
done
|
||||
|
||||
# in testing alloy sounded REALY BAD after cloning. Save it anyways, but use another as the default.
|
||||
wget -q https://cdn.openai.com/API/docs/audio/alloy.wav -O - | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/alloy0.wav
|
||||
@ -2,4 +2,4 @@
|
||||
export COQUI_TOS_AGREED=1
|
||||
model="tts_models/multilingual/multi-dataset/xtts_v2"
|
||||
python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('$model')"
|
||||
$(cd voices/ && ./download_samples.sh)
|
||||
./download_samples.sh
|
||||
4
main.py
4
main.py
@ -18,6 +18,7 @@ app = FastAPI()
|
||||
|
||||
class xtts_wrapper():
|
||||
def __init__(self, model_name):
|
||||
global args
|
||||
self.xtts = TTS(model_name=model_name, progress_bar=False).to(args.xtts_device)
|
||||
|
||||
def tts(self, text, speaker_wav, speed):
|
||||
@ -57,6 +58,7 @@ class GenerateSpeechRequest(BaseModel):
|
||||
|
||||
@app.post("/v1/audio/speech")
|
||||
async def generate_speech(request: GenerateSpeechRequest):
|
||||
global xtts, args
|
||||
input_text = preprocess(request.input)
|
||||
model = request.model
|
||||
voice = request.voice
|
||||
@ -132,7 +134,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--piper_cuda', action='store_true', default=False, help="Enable cuda for piper. Note: --cuda/onnxruntime-gpu is not working for me, but cpu is fast enough")
|
||||
parser.add_argument('--xtts_device', action='store', default="cuda", help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
|
||||
parser.add_argument('--preload_xtts', action='store_true', default=False, help="Preload the xtts model. By default it's loaded on first use.")
|
||||
parser.add_argument('-P', '--port', action='store', default=8000, help="Server tcp port")
|
||||
parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
|
||||
parser.add_argument('-H', '--host', action='store', default='localhost', help="Host to listen on, Ex. 0.0.0.0")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
38
test_voices.sh
Normal file
38
test_voices.sh
Normal file
@ -0,0 +1,38 @@
|
||||
#!/bin/sh
|
||||
|
||||
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
|
||||
\"model\": \"tts-1\",
|
||||
\"input\": \"I'm going to play you the original voice, followed by the piper voice and finally the X T T S version 2 voice\",
|
||||
\"voice\": \"echo\",
|
||||
\"speed\": 1.0
|
||||
}" | mpv --really-quiet -
|
||||
|
||||
for voice in alloy echo fable onyx nova shimmer ; do
|
||||
|
||||
echo $voice
|
||||
|
||||
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
|
||||
\"model\": \"tts-1\",
|
||||
\"input\": \"original\",
|
||||
\"voice\": \"echo\",
|
||||
\"speed\": 1.0
|
||||
}" | mpv --really-quiet -
|
||||
|
||||
wget -q https://cdn.openai.com/API/docs/audio/$voice.wav -O - | mpv --really-quiet -
|
||||
|
||||
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
|
||||
\"model\": \"tts-1\",
|
||||
\"input\": \"The quick brown fox jumped over the lazy dog. This voice is called $voice, how do you like this voice?\",
|
||||
\"voice\": \"$voice\",
|
||||
\"speed\": 1.0
|
||||
}" | mpv --really-quiet -
|
||||
|
||||
curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d "{
|
||||
\"model\": \"tts-1-hd\",
|
||||
\"input\": \"The quick brown fox jumped over the lazy dog. This HD voice is called $voice, how do you like this voice?\",
|
||||
\"voice\": \"$voice\",
|
||||
\"speed\": 1.0
|
||||
}" | mpv --really-quiet -
|
||||
|
||||
done
|
||||
|
||||
@ -32,13 +32,13 @@ tts-1-hd:
|
||||
speaker: voices/fable.wav
|
||||
onyx:
|
||||
model: tts_models/multilingual/multi-dataset/xtts_v2
|
||||
speaker: voices/voices/onyx.wav
|
||||
speaker: voices/onyx.wav
|
||||
nova:
|
||||
model: tts_models/multilingual/multi-dataset/xtts_v2
|
||||
speaker: voices/voices/nova.wav
|
||||
speaker: voices/nova.wav
|
||||
shimmer:
|
||||
model: tts_models/multilingual/multi-dataset/xtts_v2
|
||||
speaker: voices/voices/shimmer.wav
|
||||
speaker: voices/shimmer.wav
|
||||
me:
|
||||
model: tts_models/multilingual/multi-dataset/xtts_v2
|
||||
speaker: voices/voices/me.wav # this could be you
|
||||
speaker: voices/me.wav # this could be you
|
||||
|
||||
BIN
voices/alloy.wav
Normal file
BIN
voices/alloy.wav
Normal file
Binary file not shown.
@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
wget -q https://cdn.openai.com/API/docs/audio/alloy.wav
|
||||
wget -q https://cdn.openai.com/API/docs/audio/echo.wav
|
||||
wget -q https://cdn.openai.com/API/docs/audio/fable.wav
|
||||
wget -q https://cdn.openai.com/API/docs/audio/onyx.wav
|
||||
wget -q https://cdn.openai.com/API/docs/audio/nova.wav
|
||||
wget -q https://cdn.openai.com/API/docs/audio/shimmer.wav
|
||||
0
voices/put_your_voices_here.txt
Normal file
0
voices/put_your_voices_here.txt
Normal file
Loading…
Reference in New Issue
Block a user