mirror of
https://github.com/matatonic/openedai-speech
synced 2025-06-26 18:16:32 +00:00
0.17.1
This commit is contained in:
parent
02a6c7eb3e
commit
fdd443b10f
@ -30,6 +30,11 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
|
|||||||
|
|
||||||
## Recent Changes
|
## Recent Changes
|
||||||
|
|
||||||
|
Version 0.17.1, 2024-07-01
|
||||||
|
|
||||||
|
* fix ROCm (add langdetect to requirements-rocm.txt)
|
||||||
|
* Fix zh-cn for xtts
|
||||||
|
|
||||||
Version 0.17.0, 2024-07-01
|
Version 0.17.0, 2024-07-01
|
||||||
|
|
||||||
* Automatic language detection, thanks [@RodolfoCastanheira](https://github.com/RodolfoCastanheira)
|
* Automatic language detection, thanks [@RodolfoCastanheira](https://github.com/RodolfoCastanheira)
|
||||||
|
|||||||
@ -11,8 +11,8 @@ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFo
|
|||||||
|
|
||||||
parser.add_argument('sample', action='store', help="Set the wav sample file")
|
parser.add_argument('sample', action='store', help="Set the wav sample file")
|
||||||
parser.add_argument('-n', '--name', action='store', help="Set the name for the voice (by default will use the WAV file name)")
|
parser.add_argument('-n', '--name', action='store', help="Set the name for the voice (by default will use the WAV file name)")
|
||||||
parser.add_argument('-l', '--language', action='store', default="en", help="Set the language for the voice",
|
parser.add_argument('-l', '--language', action='store', default="auto", help="Set the language for the voice",
|
||||||
choices=['en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'ja', 'hu', 'ko'])
|
choices=['auto', 'en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'ja', 'hu', 'ko', 'hi'])
|
||||||
parser.add_argument('--openai-model', action='store', default="tts-1-hd", help="Set the openai model for the voice")
|
parser.add_argument('--openai-model', action='store', default="tts-1-hd", help="Set the openai model for the voice")
|
||||||
parser.add_argument('--xtts-model', action='store', default="xtts", help="Set the xtts model for the voice (if using a custom model, also set model_path)")
|
parser.add_argument('--xtts-model', action='store', default="xtts", help="Set the xtts model for the voice (if using a custom model, also set model_path)")
|
||||||
parser.add_argument('--model-path', action='store', default=None, help="Set the path for a custom xtts model")
|
parser.add_argument('--model-path', action='store', default=None, help="Set the path for a custom xtts model")
|
||||||
|
|||||||
@ -3,6 +3,7 @@ uvicorn
|
|||||||
loguru
|
loguru
|
||||||
piper-tts
|
piper-tts
|
||||||
coqui-tts
|
coqui-tts
|
||||||
|
langdetect
|
||||||
# Creating an environment where deepspeed works is complex, for now it will be disabled by default.
|
# Creating an environment where deepspeed works is complex, for now it will be disabled by default.
|
||||||
#deepspeed
|
#deepspeed
|
||||||
torch; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
torch; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
|
||||||
|
|||||||
@ -296,7 +296,11 @@ async def generate_speech(request: GenerateSpeechRequest):
|
|||||||
hf_generate_kwargs['enable_text_splitting'] = hf_generate_kwargs.get('enable_text_splitting', True) # change the default to true
|
hf_generate_kwargs['enable_text_splitting'] = hf_generate_kwargs.get('enable_text_splitting', True) # change the default to true
|
||||||
|
|
||||||
if hf_generate_kwargs['enable_text_splitting']:
|
if hf_generate_kwargs['enable_text_splitting']:
|
||||||
all_text = split_sentence(input_text, language, xtts.xtts.tokenizer.char_limits[language])
|
if language == 'zh-cn':
|
||||||
|
split_lang = 'zh'
|
||||||
|
else:
|
||||||
|
split_lang = language
|
||||||
|
all_text = split_sentence(input_text, split_lang, xtts.xtts.tokenizer.char_limits[split_lang])
|
||||||
else:
|
else:
|
||||||
all_text = [input_text]
|
all_text = [input_text]
|
||||||
|
|
||||||
|
|||||||
@ -46,8 +46,9 @@ tts-1-hd:
|
|||||||
model: xtts
|
model: xtts
|
||||||
speaker: voices/shimmer.wav
|
speaker: voices/shimmer.wav
|
||||||
me:
|
me:
|
||||||
model: xtts_v2.0.2 # you can specify different xtts version
|
model: xtts_v2.0.2 # you can specify an older xtts version
|
||||||
speaker: voices/me.wav # this could be you
|
speaker: voices/me.wav # this could be you
|
||||||
|
language: auto
|
||||||
enable_text_splitting: True
|
enable_text_splitting: True
|
||||||
length_penalty: 1.0
|
length_penalty: 1.0
|
||||||
repetition_penalty: 10
|
repetition_penalty: 10
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user