0.14.1 +deepspeed (not in prebuilt docker)

2025-06-26 18:16:32 +00:00 · 2024-06-27 00:47:56 -04:00 · 2024-06-27 00:47:56 -04:00 · c957ad86fc
commit c957ad86fc
parent ae6a384e75
5 changed files with 15 additions and 3 deletions
--- a/7
+++ b/7
@ -5,6 +5,11 @@ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
 RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
 RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
 ENV PATH="/root/.cargo/bin:${PATH}"
+# for deepspeed support - doesn't seem worth it, image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage?
+#RUN curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
+#RUN dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
+#RUN apt-get update && apt-get install --no-install-recommends -y build-essential cuda-toolkit
+#ENV CUDA_HOME=/usr/local/cuda
 RUN apt-get clean && rm -rf /var/lib/apt/lists/*

 WORKDIR /app
@ -17,7 +22,7 @@ COPY requirements*.txt /app/
 RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi
 RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt

-COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
+COPY *.py *.sh *.default.yaml README.md LICENSE /app/

 ARG PRELOAD_MODEL
 ENV PRELOAD_MODEL=${PRELOAD_MODEL}
--- a/README.md
+++ b/README.md
@ -29,6 +29,10 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s

 ## Recent Changes

+Version 0.14.1, 2024-06-26
+
+* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
+
 Version 0.14.0, 2024-06-26

 * Added `response_format`: `wav` and `pcm` support
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@ -6,7 +6,8 @@ piper-tts==1.2.0
 # xtts
 TTS==0.22.0
 # https://github.com/huggingface/transformers/issues/31040
-transformers<4.41.0 
+transformers<4.41.0
+deepspeed<0.14.0
 # XXX, 3.8+ has some issue for now
 spacy==3.7.4

--- a/requirements.txt
+++ b/requirements.txt
@ -7,6 +7,7 @@ piper-tts==1.2.0
 TTS==0.22.0
 # https://github.com/huggingface/transformers/issues/31040
 transformers<4.41.0 
+deepspeed<0.14.0
 # XXX, 3.8+ has some issue for now
 spacy==3.7.4

--- a/speech.py
+++ b/speech.py
@ -64,7 +64,7 @@ class xtts_wrapper():
        config = XttsConfig()
        config.load_json(config_path)
        self.xtts = Xtts.init_from_config(config)
-        self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=False)  # XXX there are no prebuilt deepspeed wheels??
+        self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=args.use_deepspeed)  # XXX there are no prebuilt deepspeed wheels??
        self.xtts = self.xtts.to(device=device)
        self.xtts.eval()

@ -314,6 +314,7 @@ if __name__ == "__main__":
    parser.add_argument('--xtts_device', action='store', default=auto_torch_device(), help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
    parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
    parser.add_argument('--unload-timer', action='store', default=None, type=int, help="Idle unload timer for the XTTS model in seconds")
+    parser.add_argument('--use-deepspeed', action='store_true', default=False, help="Use deepspeed for faster generation and lower VRAM usage in xtts")
    parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
    parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
    parser.add_argument('-L', '--log-level', default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the log level")