0.14.1 +deepspeed (not in prebuilt docker)

This commit is contained in:
matatonic 2024-06-27 00:47:56 -04:00
parent ae6a384e75
commit c957ad86fc
5 changed files with 15 additions and 3 deletions

View File

@ -5,6 +5,11 @@ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
ENV PATH="/root/.cargo/bin:${PATH}"
# for deepspeed support - doesn't seem worth it, image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage?
#RUN curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
#RUN dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
#RUN apt-get update && apt-get install --no-install-recommends -y build-essential cuda-toolkit
#ENV CUDA_HOME=/usr/local/cuda
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
WORKDIR /app
@ -17,7 +22,7 @@ COPY requirements*.txt /app/
RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
COPY *.py *.sh *.default.yaml README.md LICENSE /app/
ARG PRELOAD_MODEL
ENV PRELOAD_MODEL=${PRELOAD_MODEL}

View File

@ -29,6 +29,10 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
## Recent Changes
Version 0.14.1, 2024-06-26
* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
Version 0.14.0, 2024-06-26
* Added `response_format`: `wav` and `pcm` support

View File

@ -6,7 +6,8 @@ piper-tts==1.2.0
# xtts
TTS==0.22.0
# https://github.com/huggingface/transformers/issues/31040
transformers<4.41.0
transformers<4.41.0
deepspeed<0.14.0
# XXX, 3.8+ has some issue for now
spacy==3.7.4

View File

@ -7,6 +7,7 @@ piper-tts==1.2.0
TTS==0.22.0
# https://github.com/huggingface/transformers/issues/31040
transformers<4.41.0
deepspeed<0.14.0
# XXX, 3.8+ has some issue for now
spacy==3.7.4

View File

@ -64,7 +64,7 @@ class xtts_wrapper():
config = XttsConfig()
config.load_json(config_path)
self.xtts = Xtts.init_from_config(config)
self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=False) # XXX there are no prebuilt deepspeed wheels??
self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=args.use_deepspeed) # XXX there are no prebuilt deepspeed wheels??
self.xtts = self.xtts.to(device=device)
self.xtts.eval()
@ -314,6 +314,7 @@ if __name__ == "__main__":
parser.add_argument('--xtts_device', action='store', default=auto_torch_device(), help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
parser.add_argument('--unload-timer', action='store', default=None, type=int, help="Idle unload timer for the XTTS model in seconds")
parser.add_argument('--use-deepspeed', action='store_true', default=False, help="Use deepspeed for faster generation and lower VRAM usage in xtts")
parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
parser.add_argument('-L', '--log-level', default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the log level")