Spaces:
Running
Running
Use multi-speaker model
Browse files- app.py +24 -13
- language_ids.json +4 -0
- speakers.pth +3 -0
app.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
|
7 |
CUDA = torch.cuda.is_available()
|
8 |
|
9 |
-
REPO_ID = "ayymen/Coqui-TTS-Vits-
|
10 |
|
11 |
VOICE_CONVERSION_MODELS = {
|
12 |
'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
|
@@ -14,26 +14,36 @@ VOICE_CONVERSION_MODELS = {
|
|
14 |
'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
|
15 |
}
|
16 |
|
|
|
|
|
|
|
|
|
17 |
my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
|
18 |
my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
|
19 |
|
20 |
my_examples = [
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
]
|
26 |
|
27 |
my_inputs = [
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
32 |
]
|
33 |
|
34 |
my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
|
35 |
|
36 |
-
best_model_path = hf_hub_download(repo_id=REPO_ID, filename="
|
37 |
config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
|
38 |
|
39 |
api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
|
@@ -42,21 +52,22 @@ api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA
|
|
42 |
for model in VOICE_CONVERSION_MODELS.values():
|
43 |
api.load_vc_model_by_name(model, gpu=CUDA)
|
44 |
|
45 |
-
def tts(text: str,
|
46 |
# replace oov characters
|
47 |
text = text.replace("\n", ". ")
|
48 |
text = text.replace("(", ",")
|
49 |
text = text.replace(")", ",")
|
50 |
text = text.replace('"', ",")
|
|
|
51 |
text = text.replace(";", ",")
|
52 |
text = text.replace("-", " ")
|
53 |
|
54 |
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
|
55 |
if speaker_wav:
|
56 |
api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
|
57 |
-
api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences)
|
58 |
else:
|
59 |
-
api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences)
|
60 |
|
61 |
return fp.name
|
62 |
|
|
|
6 |
|
7 |
CUDA = torch.cuda.is_available()
|
8 |
|
9 |
+
REPO_ID = "ayymen/Coqui-TTS-Vits-Multispeaker"
|
10 |
|
11 |
VOICE_CONVERSION_MODELS = {
|
12 |
'freevc24': 'voice_conversion_models/multilingual/vctk/freevc24',
|
|
|
14 |
'openvoice_v2': 'voice_conversion_models/multilingual/multi-dataset/openvoice_v2',
|
15 |
}
|
16 |
|
17 |
+
VARIANTS = {"Tachelhit": "shi", "Tarifit": "rif"}
|
18 |
+
|
19 |
+
SPEAKERS = ["yan", "sin", "idj"]
|
20 |
+
|
21 |
my_title = "ⴰⴹⵕⵉⵚ ⵙ ⵉⵎⵙⵍⵉ - Tamazight Text-to-Speech"
|
22 |
my_description = "This model is based on [VITS](https://github.com/jaywalnut310/vits), thanks to 🐸 [Coqui.ai](https://coqui.ai/)."
|
23 |
|
24 |
my_examples = [
|
25 |
+
["ⴰⵣⵓⵍ. ⵎⴰⵏⵣⴰⴽⵉⵏ?", "shi", "yan", True],
|
26 |
+
["ⵡⴰ ⵜⴰⵎⵖⴰⵔⵜ ⵎⴰ ⴷ ⵓⴽⴰⵏ ⵜⵙⴽⵔⵜ?", "shi", "sin", False],
|
27 |
+
["ⴳⵏ ⴰⴷ ⴰⴽ ⵉⵙⵙⴳⵏ ⵕⴱⴱⵉ ⵉⵜⵜⵓ ⴽ.", "shi", "yan", False],
|
28 |
+
["ⴰⵔⵔⴰⵡ ⵏ ⵍⵀⵎⵎ ⵢⵓⴽⵔ ⴰⵖ ⵉⵀⴷⵓⵎⵏ ⵏⵏⵖ!", "shi", "yan", False],
|
29 |
+
["ⴰⵣⵓⵍ. ⵎⴰⵎⵛ ⵜⴷⵊⵉⵜ?", "rif", "idj", True],
|
30 |
+
["ⴰⵇⵎⵎⵓⵎ ⵉⵇⵏⴻⵏ ⵓⵔ ⵜ ⵜⵜⵉⴷⴼⵏ ⵉⵣⴰⵏ.", "rif", "idj", False],
|
31 |
+
["ⵇⵇⵉⵎ ⵅ ⵜⴰⴷⴷⴰⵔⵜ ⵏⵏⵛ!", "rif", "idj", False],
|
32 |
+
["ⵜⴻⵜⵜⵏ ⴰⴳ ⵡⵓⵛⵛⵏ, ⵜⵜⵔⵓⵏ ⵅ ⵓⵎⴽⵙⴰ.", "rif", "idj", False]
|
33 |
]
|
34 |
|
35 |
my_inputs = [
|
36 |
+
gr.Textbox(lines=5, label="Input Text", placeholder="The only available characters are: ⴰⴱⴳⴷⴹⴻⴼⴽⵀⵃⵄⵅⵇⵉⵊⵍⵎⵏⵓⵔⵕⵖⵙⵚⵛⵜⵟⵡⵢⵣⵥⵯ !,.:?"),
|
37 |
+
gr.Dropdown(label="Variant", choices=list(VARIANTS.items()), value="shi"),
|
38 |
+
gr.Dropdown(label="Speaker", choices=SPEAKERS, value="yan"),
|
39 |
+
gr.Checkbox(label="Split Sentences (each sentence will be generated separately)", value=False),
|
40 |
+
gr.Audio(type="filepath", label="Speaker audio for voice cloning (optional)"),
|
41 |
+
gr.Dropdown(label="Voice Conversion Model", choices=list(VOICE_CONVERSION_MODELS.keys())),
|
42 |
]
|
43 |
|
44 |
my_outputs = gr.Audio(type="filepath", label="Output Audio", autoplay=True)
|
45 |
|
46 |
+
best_model_path = hf_hub_download(repo_id=REPO_ID, filename="checkpoint_390000.pth")
|
47 |
config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
|
48 |
|
49 |
api = TTS(model_path=best_model_path, config_path=config_path).to("cuda" if CUDA else "cpu")
|
|
|
52 |
for model in VOICE_CONVERSION_MODELS.values():
|
53 |
api.load_vc_model_by_name(model, gpu=CUDA)
|
54 |
|
55 |
+
def tts(text: str, variant: str = "shi", speaker: str = "yan", split_sentences: bool = False, speaker_wav: str = None, voice_cv_model: str = 'freevc24'):
|
56 |
# replace oov characters
|
57 |
text = text.replace("\n", ". ")
|
58 |
text = text.replace("(", ",")
|
59 |
text = text.replace(")", ",")
|
60 |
text = text.replace('"', ",")
|
61 |
+
text = text.replace("'", ",")
|
62 |
text = text.replace(";", ",")
|
63 |
text = text.replace("-", " ")
|
64 |
|
65 |
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
|
66 |
if speaker_wav:
|
67 |
api.load_vc_model_by_name(VOICE_CONVERSION_MODELS[voice_cv_model], gpu=CUDA)
|
68 |
+
api.tts_with_vc_to_file(text, speaker_wav=speaker_wav, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
|
69 |
else:
|
70 |
+
api.tts_to_file(text, file_path=fp.name, split_sentences=split_sentences, speaker=speaker, language=variant)
|
71 |
|
72 |
return fp.name
|
73 |
|
language_ids.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"rif": 0,
|
3 |
+
"shi": 1
|
4 |
+
}
|
speakers.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a9df430489a8bf3eac98f38325dbdbd8d986fa731787724406062bacac5a471
|
3 |
+
size 864
|