mirror of
https://github.com/matatonic/openedai-speech
synced 2025-06-26 18:16:32 +00:00
0.10.0
This commit is contained in:
parent
a2a3d2b3eb
commit
6864cf03b1
127
.github/workflows/build-docker.yml
vendored
Normal file
127
.github/workflows/build-docker.yml
vendored
Normal file
@ -0,0 +1,127 @@
|
||||
name: Build and Publish Docker Image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
env:
|
||||
# Set up environment variables for the job
|
||||
DOCKER_REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
TAG: ${{ github.sha }}
|
||||
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
install: true
|
||||
|
||||
# Log in to the GitHub Container Registry only when not running on a pull request event
|
||||
- name: Login to Docker Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.DOCKER_REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
# Build and push the Docker image to GHCR for the main branch or specific tags
|
||||
- name: Build and Push Docker Image
|
||||
if: github.ref == 'refs/heads/main'
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
labels: version=${{ github.run_id }}
|
||||
|
||||
# For tagged releases, build and push the Docker image with the corresponding tag
|
||||
- name: Build and Push Docker Image (Tagged)
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||
labels: version=${{ github.run_id }}
|
||||
|
||||
build-and-push-alt-image:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
env:
|
||||
# Set up environment variables for the job
|
||||
DOCKER_REGISTRY: ghcr.io
|
||||
IMAGE_NAME: matatonic/openedai-speech-min
|
||||
TAG: ${{ github.sha }}
|
||||
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
install: true
|
||||
|
||||
# Log in to the GitHub Container Registry only when not running on a pull request event
|
||||
- name: Login to Docker Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.DOCKER_REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
# Build and push the Docker image to GHCR for the main branch or specific tags
|
||||
- name: Build and Push Docker Image
|
||||
if: github.ref == 'refs/heads/main'
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile.min
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
labels: version=${{ github.run_id }}
|
||||
|
||||
# For tagged releases, build and push the Docker image with the corresponding tag
|
||||
- name: Build and Push Docker Image (Tagged)
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile.min
|
||||
push: true
|
||||
tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
|
||||
labels: version=${{ github.run_id }}
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
voices/
|
||||
.env
|
||||
speech.env
|
||||
config/pre_process_map.yaml
|
||||
config/voice_to_speaker.yaml
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
13
Dockerfile
13
Dockerfile
@ -1,24 +1,17 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV COQUI_TOS_AGREED=1
|
||||
ENV PRELOAD_MODEL=xtts
|
||||
# or PRELOAD_MODEL=parler-tts/parler_tts_mini_v0.1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y curl git ffmpeg
|
||||
|
||||
#RUN git clone https://github.com/matatonic/openedai-speech /app
|
||||
RUN mkdir -p /app/voices
|
||||
# default clone of the default voice is really bad, use a better default
|
||||
COPY voices/alloy-alt.wav /app/voices/
|
||||
WORKDIR /app
|
||||
COPY *.txt /app/
|
||||
RUN pip install --no-cache -r requirements.txt
|
||||
COPY *.sh /app/
|
||||
RUN ./download_voices_tts-1.sh
|
||||
RUN ./download_voices_tts-1-hd.sh
|
||||
COPY *.py *.yaml *.md LICENSE /app/
|
||||
COPY *.sh *.py *.yaml *.md LICENSE config /app/
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD python speech.py --host 0.0.0.0 --port 8000 --preload $PRELOAD_MODEL
|
||||
ENV CLI_COMMAND="python speech.py"
|
||||
CMD $CLI_COMMAND
|
||||
|
@ -3,15 +3,13 @@ FROM python:3.11-slim
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y ffmpeg curl
|
||||
|
||||
RUN pip install --no-cache piper-tts pyyaml fastapi uvicorn
|
||||
RUN pip install --no-cache piper-tts==1.2.0 pyyaml fastapi uvicorn
|
||||
|
||||
#RUN git clone https://github.com/matatonic/openedai-speech /app
|
||||
RUN mkdir -p /app/voices
|
||||
COPY *.py *.yaml *.txt *.md *.sh LICENSE /app/
|
||||
WORKDIR /app
|
||||
|
||||
RUN ./download_voices_tts-1.sh
|
||||
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD python speech.py --host 0.0.0.0 --port 8000 --xtts_device none
|
||||
ENV CLI_COMMAND="python speech.py --xtts_device none"
|
||||
CMD $CLI_COMMAND
|
||||
|
68
README.md
68
README.md
@ -25,6 +25,12 @@ Details:
|
||||
If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know so I can update the defaults.
|
||||
|
||||
|
||||
Version: 0.10.0, 2024-04-26
|
||||
|
||||
* Better upgrades: Reorganize config files under config, voice models under voices
|
||||
* * **If you customized your `voice_to_speaker.yaml` or `pre_process_map.yaml` you need to move them to the `config/` folder.**
|
||||
* default listen host to 0.0.0.0
|
||||
|
||||
Version: 0.9.0, 2024-04-23
|
||||
|
||||
* Fix bug with yaml and loading UTF-8
|
||||
@ -54,45 +60,47 @@ API Documentation
|
||||
Installation instructions
|
||||
-------------------------
|
||||
|
||||
You can run the server via docker like so (**recommended**):
|
||||
1) Download the models & voices
|
||||
```shell
|
||||
# for tts-1 / piper
|
||||
bash download_voices_tts-1.sh
|
||||
# and for tts-1-hd / xtts
|
||||
bash download_voices_tts-1-hd.sh
|
||||
```
|
||||
|
||||
2a) Docker (**recommended**): You can run the server via docker like so:
|
||||
```shell
|
||||
cp sample.env speech.env # edit to suit your environment as needed, you can preload a model on startup
|
||||
docker compose up
|
||||
```
|
||||
If you want a minimal docker image with piper support only (900MB vs. 13.5GB, see: Dockerfile.min). You can edit the `docker-compose.yml` to easily change this.
|
||||
If you want a minimal docker image with piper support only (~1GB vs. ~10GB, see: Dockerfile.min). You can edit the `docker-compose.yml` to easily change this.
|
||||
|
||||
Manual instructions:
|
||||
2b) Manual instructions:
|
||||
```shell
|
||||
# Install the Python requirements
|
||||
pip install -r requirements.txt
|
||||
# install ffmpeg and curl
|
||||
sudo apt install ffmpeg curl
|
||||
# Download the voice models:
|
||||
# for tts-1
|
||||
bash download_voices_tts-1.sh
|
||||
# and for tts-1-hd
|
||||
bash download_voices_tts-1-hd.sh
|
||||
python speech.py
|
||||
```
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
```
|
||||
usage: speech.py [-h] [--piper_cuda] [--xtts_device XTTS_DEVICE] [--preload PRELOAD] [-P PORT]
|
||||
[-H HOST]
|
||||
usage: speech.py [-h] [--piper_cuda] [--xtts_device XTTS_DEVICE] [--preload PRELOAD] [-P PORT] [-H HOST]
|
||||
|
||||
OpenedAI Speech API Server
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--piper_cuda Enable cuda for piper. Note: --cuda/onnxruntime-gpu is not working for me,
|
||||
but cpu is fast enough (default: False)
|
||||
--piper_cuda Enable cuda for piper. Note: --cuda/onnxruntime-gpu is not working for me, but cpu is fast enough (default: False)
|
||||
--xtts_device XTTS_DEVICE
|
||||
Set the device for the xtts model. The special value of 'none' will use
|
||||
piper for all models. (default: cuda)
|
||||
--preload PRELOAD Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on
|
||||
first use. (default: None)
|
||||
Set the device for the xtts model. The special value of 'none' will use piper for all models. (default: cuda)
|
||||
--preload PRELOAD Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use. (default: None)
|
||||
-P PORT, --port PORT Server tcp port (default: 8000)
|
||||
-H HOST, --host HOST Host to listen on, Ex. 0.0.0.0 (default: localhost)
|
||||
-H HOST, --host HOST Host to listen on, Ex. 0.0.0.0 (default: 0.0.0.0)
|
||||
|
||||
```
|
||||
|
||||
Sample API Usage
|
||||
@ -141,10 +149,32 @@ with client.audio.speech.with_streaming_response.create(
|
||||
Also see the `say.py` sample application for an example of how to use the openai-python API.
|
||||
|
||||
```
|
||||
$ python say.py -i "The quick brown fox jumped over the lazy dog." -p # play the audio, requires 'pip install playsound'
|
||||
$ python say.py -i "The quick brown fox jumped over the lazy dog." -m tts-1-hd -v onyx -f flac -o fox.flac # save to a file.
|
||||
$ python say.py -t "The quick brown fox jumped over the lazy dog." -p # play the audio, requires 'pip install playsound'
|
||||
$ python say.py -t "The quick brown fox jumped over the lazy dog." -m tts-1-hd -v onyx -f flac -o fox.flac # save to a file.
|
||||
```
|
||||
|
||||
```
|
||||
usage: say.py [-h] [-m MODEL] [-v VOICE] [-f {mp3,aac,opus,flac}] [-s SPEED] [-t TEXT] [-i INPUT] [-o OUTPUT] [-p]
|
||||
|
||||
Text to speech using the OpenAI API
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-m MODEL, --model MODEL
|
||||
The model to use (default: tts-1)
|
||||
-v VOICE, --voice VOICE
|
||||
The voice of the speaker (default: alloy)
|
||||
-f {mp3,aac,opus,flac}, --format {mp3,aac,opus,flac}
|
||||
The output audio format (default: mp3)
|
||||
-s SPEED, --speed SPEED
|
||||
playback speed, 0.25-4.0 (default: 1.0)
|
||||
-t TEXT, --text TEXT Provide text to read on the command line (default: None)
|
||||
-i INPUT, --input INPUT
|
||||
Read text from a file (default is to read from stdin) (default: None)
|
||||
-o OUTPUT, --output OUTPUT
|
||||
The filename to save the output to (default: None)
|
||||
-p, --playsound Play the audio (default: False)
|
||||
```
|
||||
|
||||
Custom Voices Howto
|
||||
-------------------
|
||||
|
@ -2,7 +2,7 @@
|
||||
some_other_voice_name_you_want:
|
||||
model: voices/choose your own model.onnx
|
||||
speaker: set your own speaker
|
||||
alloy:
|
||||
alloy:
|
||||
model: voices/en_US-libritts_r-medium.onnx
|
||||
speaker: 79 # 64, 79, 80, 101, 130
|
||||
echo:
|
||||
@ -24,7 +24,7 @@
|
||||
model: voices/en_US-libritts_r-medium.onnx
|
||||
speaker: 163
|
||||
tts-1-hd:
|
||||
alloy:
|
||||
alloy:
|
||||
model: xtts
|
||||
speaker: voices/alloy-alt.wav
|
||||
alloy-orig:
|
@ -1,16 +1,17 @@
|
||||
services:
|
||||
server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile # for tts-1-hd support via xtts_v2, ~4GB VRAM required, ~10GB
|
||||
#dockerfile: Dockerfile.min # piper for all models, no gpu/nvidia required, ~1GB
|
||||
#command: ["python", "speech.py", "--host", "0.0.0.0", "--port", "8000", "--preload", "parler-tts/parler_tts_mini_v0.1"]
|
||||
command: ["python", "speech.py", "--host", "0.0.0.0", "--port", "8000", "--preload", "xtts"]
|
||||
#command: ["python", "speech.py", "--host", "0.0.0.0", "--port", "8000", "--xtts_device", "none"] # min
|
||||
image: ghcr.io/matatonic/openedai-speech
|
||||
#image: ghcr.io/matatonic/openedai-speech-min
|
||||
env_file: speech.env
|
||||
ports:
|
||||
- "8000:8000"
|
||||
# volumes:
|
||||
# - .:/app/
|
||||
volumes:
|
||||
- ./voices:/app/voices
|
||||
- ./config:/app/config
|
||||
#restart: unless-stopped # install as a service
|
||||
# Below can be removed if not using GPU
|
||||
runtime: nvidia
|
||||
deploy:
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
for i in alloy echo fable onyx nova shimmer; do
|
||||
curl -s https://cdn.openai.com/API/docs/audio/$i.wav | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/$i.wav
|
||||
[ ! -e "voices/$i.wav" ] && curl -s https://cdn.openai.com/API/docs/audio/$i.wav | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/$i.wav
|
||||
done
|
||||
|
@ -1,4 +1,9 @@
|
||||
#!/bin/sh
|
||||
export COQUI_TOS_AGREED=1
|
||||
python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('$PRELOAD_MODEL')"
|
||||
export TTS_HOME=voices
|
||||
|
||||
MODELS=${*:-xtts}
|
||||
for model in $MODELS; do
|
||||
python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('$model')"
|
||||
done
|
||||
./download_samples.sh
|
@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
models="en_GB-northern_english_male-medium en_US-libritts_r-medium" # en_US-ryan-high
|
||||
models=${*:-"en_GB-northern_english_male-medium en_US-libritts_r-medium"} # en_US-ryan-high
|
||||
piper --update-voices --data-dir voices --download-dir voices --model x 2> /dev/null
|
||||
for i in $models ; do
|
||||
piper --data-dir voices --download-dir voices --model $i < /dev/null > /dev/null
|
||||
|
@ -3,8 +3,8 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import PlainTextResponse
|
||||
|
||||
class OpenAIStub(FastAPI):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.models = {}
|
||||
|
||||
self.add_middleware(
|
||||
|
@ -1,7 +1,7 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
# piper-tts
|
||||
piper-tts
|
||||
piper-tts==1.2.0
|
||||
onnxruntime-gpu
|
||||
# xtts
|
||||
TTS
|
||||
|
6
sample.env
Normal file
6
sample.env
Normal file
@ -0,0 +1,6 @@
|
||||
TTS_HOME=voices
|
||||
HF_HOME=voices
|
||||
#PRELOAD_MODEL=xtts
|
||||
#PRELOAD_MODEL=xtts_v2.0.2
|
||||
#CLI_COMMAND="python speech.py --preload $PRELOAD_MODEL"
|
||||
#CLI_COMMAND="python speech.py --xtts_device none" # for piper only
|
52
say.py
52
say.py
@ -2,6 +2,7 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
import atexit
|
||||
import tempfile
|
||||
import argparse
|
||||
|
||||
@ -20,19 +21,23 @@ import openai
|
||||
|
||||
|
||||
def parse_args(argv):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", type=str, default="tts-1")#, choices=["tts-1", "tts-1-hd"])
|
||||
parser.add_argument("-v", "--voice", type=str, default="alloy")#, choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"])
|
||||
parser.add_argument("-f", "--format", type=str, default="mp3", choices=["mp3", "aac", "opus", "flac"])
|
||||
parser.add_argument("-s", "--speed", type=float, default=1.0)
|
||||
parser.add_argument("-i", "--input", type=str)
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Text to speech using the OpenAI API',
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-m", "--model", type=str, default="tts-1", help="The model to use")#, choices=["tts-1", "tts-1-hd"])
|
||||
parser.add_argument("-v", "--voice", type=str, default="alloy", help="The voice of the speaker")#, choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"])
|
||||
parser.add_argument("-f", "--format", type=str, default="mp3", choices=["mp3", "aac", "opus", "flac"], help="The output audio format")
|
||||
parser.add_argument("-s", "--speed", type=float, default=1.0, help="playback speed, 0.25-4.0")
|
||||
parser.add_argument("-t", "--text", type=str, default=None, help="Provide text to read on the command line")
|
||||
parser.add_argument("-i", "--input", type=str, default=None, help="Read text from a file (default is to read from stdin)")
|
||||
|
||||
if playsound is None:
|
||||
parser.add_argument("-o", "--output", type=str) # required
|
||||
parser.add_argument("-o", "--output", type=str, help="The filename to save the output to") # required
|
||||
parser.add_argument("-p", "--playsound", type=None, default=None, help="python playsound not found. pip install playsound")
|
||||
else:
|
||||
parser.add_argument("-o", "--output", type=str, default=None) # not required
|
||||
parser.add_argument("-p", "--playsound", action="store_true")
|
||||
parser.add_argument("-o", "--output", type=str, default=None, help="The filename to save the output to") # not required
|
||||
parser.add_argument("-p", "--playsound", action="store_true", help="Play the audio")
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
@ -50,6 +55,17 @@ if __name__ == "__main__":
|
||||
print("Must select one of playsound (-p) or output file name (-o)")
|
||||
sys.exit(1)
|
||||
|
||||
if args.input is None and args.text is None:
|
||||
text = sys.stdin.read()
|
||||
elif args.text:
|
||||
text = args.text
|
||||
elif args.input:
|
||||
if os.path.exists(args.input):
|
||||
with open(args.input, 'r') as f:
|
||||
text = f.read()
|
||||
else:
|
||||
print(f"Warning! File not found: {args.input}\nFalling back to old behavior for -i")
|
||||
text = args.input
|
||||
|
||||
client = openai.OpenAI(
|
||||
# This part is not needed if you set these environment variables before import openai
|
||||
@ -60,21 +76,21 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
if args.playsound and args.output is None:
|
||||
tf, args.output = file_path = tempfile.mkstemp(suffix='.wav')
|
||||
else:
|
||||
tf = None
|
||||
_, args.output = tempfile.mkstemp(suffix='.wav')
|
||||
|
||||
def cleanup():
|
||||
os.unlink(args.output)
|
||||
|
||||
atexit.register(cleanup)
|
||||
|
||||
with client.audio.speech.with_streaming_response.create(
|
||||
model=args.model,
|
||||
voice=args.voice,
|
||||
speed=args.speed,
|
||||
response_format=args.format,
|
||||
input=args.input,
|
||||
input=text,
|
||||
) as response:
|
||||
response.stream_to_file(args.output)
|
||||
|
||||
if args.playsound:
|
||||
playsound(args.output)
|
||||
|
||||
if tf:
|
||||
os.unlink(args.output)
|
||||
if args.playsound:
|
||||
playsound(args.output)
|
||||
|
18
speech.py
18
speech.py
@ -65,9 +65,18 @@ class parler_tts():
|
||||
return tf
|
||||
|
||||
|
||||
def default_exists(filename: str):
|
||||
if not os.path.exists(filename):
|
||||
basename, ext = os.path.splitext(filename)
|
||||
default = f"{basename}.default{ext}"
|
||||
with open(default, 'r') as from_file:
|
||||
with open(filename, 'w') as to_file:
|
||||
to_file.write(from_file.read())
|
||||
|
||||
# Read pre process map on demand so it can be changed without restarting the server
|
||||
def preprocess(raw_input):
|
||||
with open('pre_process_map.yaml', 'r', encoding='utf8') as file:
|
||||
default_exists('config/pre_process_map.yaml')
|
||||
with open('config/pre_process_map.yaml', 'r', encoding='utf8') as file:
|
||||
pre_process_map = yaml.safe_load(file)
|
||||
for a, b in pre_process_map:
|
||||
raw_input = re.sub(a, b, raw_input)
|
||||
@ -75,9 +84,10 @@ def preprocess(raw_input):
|
||||
|
||||
# Read voice map on demand so it can be changed without restarting the server
|
||||
def map_voice_to_speaker(voice: str, model: str):
|
||||
with open('voice_to_speaker.yaml', 'r', encoding='utf8') as file:
|
||||
default_exists('config/voice_to_speaker.yaml')
|
||||
with open('config/voice_to_speaker.yaml', 'r', encoding='utf8') as file:
|
||||
voice_map = yaml.safe_load(file)
|
||||
return voice_map[model][voice]['model'], voice_map[model][voice]['speaker'],
|
||||
return (voice_map[model][voice]['model'], voice_map[model][voice]['speaker'])
|
||||
|
||||
class GenerateSpeechRequest(BaseModel):
|
||||
model: str = "tts-1" # or "tts-1-hd"
|
||||
@ -197,7 +207,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--xtts_device', action='store', default="cuda", help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
|
||||
parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
|
||||
parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
|
||||
parser.add_argument('-H', '--host', action='store', default='localhost', help="Host to listen on, Ex. 0.0.0.0")
|
||||
parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user