mirror of
https://github.com/matatonic/openedai-speech
synced 2025-06-26 18:16:32 +00:00
0.15.0
This commit is contained in:
parent
c957ad86fc
commit
be759f3fea
@ -1,7 +1,9 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip install -U pip
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
|
||||
RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg libaio-dev
|
||||
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
|
||||
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
@ -12,7 +12,7 @@ RUN mkdir -p voices config
|
||||
|
||||
COPY requirements*.txt /app/
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements-min.txt
|
||||
COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
|
||||
COPY *.py *.sh *.default.yaml README.md LICENSE /app/
|
||||
|
||||
ENV TTS_HOME=voices
|
||||
ENV HF_HOME=voices
|
||||
|
||||
@ -29,6 +29,11 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
|
||||
|
||||
## Recent Changes
|
||||
|
||||
Version 0.15.0, 2024-06-26
|
||||
|
||||
* Switch to [coqui-tts](https://github.com/idiap/coqui-ai-TTS) (updated fork), updated simpler dependencies, torch 2.3, etc.
|
||||
* Resolve cuda threading issues
|
||||
|
||||
Version 0.14.1, 2024-06-26
|
||||
|
||||
* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
|
||||
@ -127,7 +132,7 @@ source .venv/bin/activate
|
||||
# Install the Python requirements
|
||||
# - use requirements-rocm.txt for AMD GPU (ROCm support)
|
||||
# - use requirements-min.txt for piper only (CPU only)
|
||||
pip install -r requirements.txt
|
||||
pip install -U -r requirements.txt
|
||||
# run the server
|
||||
bash startup.sh
|
||||
```
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
pyyaml
|
||||
fastapi
|
||||
uvicorn
|
||||
loguru
|
||||
numpy<2
|
||||
piper-tts==1.2.0
|
||||
piper-tts
|
||||
@ -1,17 +1,8 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
loguru
|
||||
# piper-tts
|
||||
piper-tts==1.2.0
|
||||
# xtts
|
||||
TTS==0.22.0
|
||||
# https://github.com/huggingface/transformers/issues/31040
|
||||
transformers<4.41.0
|
||||
deepspeed<0.14.0
|
||||
# XXX, 3.8+ has some issue for now
|
||||
spacy==3.7.4
|
||||
|
||||
# torch==2.2.2 Fixes: https://github.com/matatonic/openedai-speech/issues/9
|
||||
# Re: https://github.com/pytorch/pytorch/issues/121834
|
||||
torch==2.2.2; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
||||
torchaudio==2.2.2; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
||||
piper-tts
|
||||
coqui-tts
|
||||
deepspeed
|
||||
torch; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
||||
torchaudio; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
||||
@ -1,22 +1,14 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
loguru
|
||||
# piper-tts
|
||||
piper-tts==1.2.0
|
||||
# xtts
|
||||
TTS==0.22.0
|
||||
# https://github.com/huggingface/transformers/issues/31040
|
||||
transformers<4.41.0
|
||||
deepspeed<0.14.0
|
||||
# XXX, 3.8+ has some issue for now
|
||||
spacy==3.7.4
|
||||
piper-tts
|
||||
coqui-tts[languages]
|
||||
deepspeed
|
||||
|
||||
# torch==2.2.2 Fixes: https://github.com/matatonic/openedai-speech/issues/9
|
||||
# Re: https://github.com/pytorch/pytorch/issues/121834
|
||||
torch==2.2.2; sys_platform != "darwin"
|
||||
torch; sys_platform != "darwin"
|
||||
torchaudio; sys_platform != "darwin"
|
||||
# for MPS accelerated torch on Mac - doesn't work yet, incomplete support in torch and torchaudio
|
||||
torch==2.2.2; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
|
||||
torchaudio==2.2.2; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
|
||||
torch; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
|
||||
torchaudio; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
|
||||
|
||||
# ROCM (Linux only) - use requirements.amd.txt
|
||||
@ -92,7 +92,8 @@ class xtts_wrapper():
|
||||
self.not_idle()
|
||||
try:
|
||||
with torch.no_grad():
|
||||
gpt_cond_latent, speaker_embedding = self.xtts.get_conditioning_latents(audio_path=[speaker_wav]) # XXX TODO: allow multiple wav
|
||||
with self.lock: # this doesn't seem threadsafe, but it's quick enough
|
||||
gpt_cond_latent, speaker_embedding = self.xtts.get_conditioning_latents(audio_path=[speaker_wav]) # XXX TODO: allow multiple wav
|
||||
|
||||
for wav in self.xtts.inference_stream(text, language, gpt_cond_latent, speaker_embedding, **hf_generate_kwargs):
|
||||
yield wav.cpu().numpy().tobytes() # assumes wav data is f32le
|
||||
|
||||
Loading…
Reference in New Issue
Block a user