mirror of
https://github.com/matatonic/openedai-speech
synced 2025-06-26 18:16:32 +00:00
0.14.1 +deepspeed (not in prebuilt docker)
This commit is contained in:
parent
ae6a384e75
commit
c957ad86fc
@ -5,6 +5,11 @@ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
|
||||
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
|
||||
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
# for deepspeed support - doesn't seem worth it, image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage?
|
||||
#RUN curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
#RUN dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
|
||||
#RUN apt-get update && apt-get install --no-install-recommends -y build-essential cuda-toolkit
|
||||
#ENV CUDA_HOME=/usr/local/cuda
|
||||
RUN apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
@ -17,7 +22,7 @@ COPY requirements*.txt /app/
|
||||
RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
|
||||
|
||||
COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
|
||||
COPY *.py *.sh *.default.yaml README.md LICENSE /app/
|
||||
|
||||
ARG PRELOAD_MODEL
|
||||
ENV PRELOAD_MODEL=${PRELOAD_MODEL}
|
||||
|
||||
@ -29,6 +29,10 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
|
||||
|
||||
## Recent Changes
|
||||
|
||||
Version 0.14.1, 2024-06-26
|
||||
|
||||
* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
|
||||
|
||||
Version 0.14.0, 2024-06-26
|
||||
|
||||
* Added `response_format`: `wav` and `pcm` support
|
||||
|
||||
@ -6,7 +6,8 @@ piper-tts==1.2.0
|
||||
# xtts
|
||||
TTS==0.22.0
|
||||
# https://github.com/huggingface/transformers/issues/31040
|
||||
transformers<4.41.0
|
||||
transformers<4.41.0
|
||||
deepspeed<0.14.0
|
||||
# XXX, 3.8+ has some issue for now
|
||||
spacy==3.7.4
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ piper-tts==1.2.0
|
||||
TTS==0.22.0
|
||||
# https://github.com/huggingface/transformers/issues/31040
|
||||
transformers<4.41.0
|
||||
deepspeed<0.14.0
|
||||
# XXX, 3.8+ has some issue for now
|
||||
spacy==3.7.4
|
||||
|
||||
|
||||
@ -64,7 +64,7 @@ class xtts_wrapper():
|
||||
config = XttsConfig()
|
||||
config.load_json(config_path)
|
||||
self.xtts = Xtts.init_from_config(config)
|
||||
self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=False) # XXX there are no prebuilt deepspeed wheels??
|
||||
self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=args.use_deepspeed) # XXX there are no prebuilt deepspeed wheels??
|
||||
self.xtts = self.xtts.to(device=device)
|
||||
self.xtts.eval()
|
||||
|
||||
@ -314,6 +314,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--xtts_device', action='store', default=auto_torch_device(), help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
|
||||
parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
|
||||
parser.add_argument('--unload-timer', action='store', default=None, type=int, help="Idle unload timer for the XTTS model in seconds")
|
||||
parser.add_argument('--use-deepspeed', action='store_true', default=False, help="Use deepspeed for faster generation and lower VRAM usage in xtts")
|
||||
parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
|
||||
parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
|
||||
parser.add_argument('-L', '--log-level', default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the log level")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user