abis90 commited on
Commit
81643c5
·
verified ·
1 Parent(s): eb41cc9

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -35
  2. README.md +100 -0
  3. checkpoint.pth +3 -0
  4. config.json +297 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,103 @@
1
  ---
2
  license: mit
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ language:
4
+ - ko
5
+ pipeline_tag: text-to-speech
6
  ---
7
+
8
+ # MeloTTS
9
+
10
+ MeloTTS is a **high-quality multi-lingual** text-to-speech library by [MyShell.ai](https://myshell.ai). Supported languages include:
11
+
12
+
13
+ | Model card | Example |
14
+ | --- | --- |
15
+ | [English](https://huggingface.co/myshell-ai/MeloTTS-English-v2) (American) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/en/EN-US/speed_1.0/sent_000.wav) |
16
+ | [English](https://huggingface.co/myshell-ai/MeloTTS-English-v2) (British) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/en/EN-BR/speed_1.0/sent_000.wav) |
17
+ | [English](https://huggingface.co/myshell-ai/MeloTTS-English-v2) (Indian) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/en/EN_INDIA/speed_1.0/sent_000.wav) |
18
+ | [English](https://huggingface.co/myshell-ai/MeloTTS-English-v2) (Australian) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/en/EN-AU/speed_1.0/sent_000.wav) |
19
+ | [English](https://huggingface.co/myshell-ai/MeloTTS-English-v2) (Default) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/en/EN-Default/speed_1.0/sent_000.wav) |
20
+ | [Spanish](https://huggingface.co/myshell-ai/MeloTTS-Spanish) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/es/ES/speed_1.0/sent_000.wav) |
21
+ | [French](https://huggingface.co/myshell-ai/MeloTTS-French) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/fr/FR/speed_1.0/sent_000.wav) |
22
+ | [Chinese](https://huggingface.co/myshell-ai/MeloTTS-Chinese) (mix EN) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/zh/ZH/speed_1.0/sent_008.wav) |
23
+ | [Japanese](https://huggingface.co/myshell-ai/MeloTTS-Japanese) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/jp/JP/speed_1.0/sent_000.wav) |
24
+ | [Korean](https://huggingface.co/myshell-ai/MeloTTS-Korean/) | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/kr/KR/speed_1.0/sent_000.wav) |
25
+
26
+ Some other features include:
27
+ - The Chinese speaker supports `mixed Chinese and English`.
28
+ - Fast enough for `CPU real-time inference`.
29
+
30
+
31
+ ## Usage
32
+
33
+ ### Without Installation
34
+
35
+ An unofficial [live demo](https://huggingface.co/spaces/mrfakename/MeloTTS) is hosted on Hugging Face Spaces.
36
+
37
+ #### Use it on MyShell
38
+
39
+ There are hundreds of TTS models on MyShell, much more than MeloTTS. See examples [here](https://github.com/myshell-ai/MeloTTS/blob/main/docs/quick_use.md#use-melotts-without-installation).
40
+ More can be found at the widget center of [MyShell.ai](https://app.myshell.ai/robot-workshop).
41
+
42
+ ### Install and Use Locally
43
+
44
+ Follow the installation steps [here](https://github.com/myshell-ai/MeloTTS/blob/main/docs/install.md#linux-and-macos-install) before using the following snippet:
45
+
46
+ ```python
47
+ from melo.api import TTS
48
+
49
+ # Speed is adjustable
50
+ speed = 1.0
51
+
52
+ # CPU is sufficient for real-time inference.
53
+ # You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps'
54
+ device = 'auto' # Will automatically use GPU if available
55
+
56
+ # English
57
+ text = "Did you ever hear a folk tale about a giant turtle?"
58
+ model = TTS(language='EN', device=device)
59
+ speaker_ids = model.hps.data.spk2id
60
+
61
+ # American accent
62
+ output_path = 'en-us.wav'
63
+ model.tts_to_file(text, speaker_ids['EN-US'], output_path, speed=speed)
64
+
65
+ # British accent
66
+ output_path = 'en-br.wav'
67
+ model.tts_to_file(text, speaker_ids['EN-BR'], output_path, speed=speed)
68
+
69
+ # Indian accent
70
+ output_path = 'en-india.wav'
71
+ model.tts_to_file(text, speaker_ids['EN_INDIA'], output_path, speed=speed)
72
+
73
+ # Australian accent
74
+ output_path = 'en-au.wav'
75
+ model.tts_to_file(text, speaker_ids['EN-AU'], output_path, speed=speed)
76
+
77
+ # Default accent
78
+ output_path = 'en-default.wav'
79
+ model.tts_to_file(text, speaker_ids['EN-Default'], output_path, speed=speed)
80
+
81
+ ```
82
+
83
+
84
+ ## Join the Community
85
+
86
+ **Open Source AI Grant**
87
+
88
+ We are actively sponsoring open-source AI projects. The sponsorship includes GPU resources, fundings and intellectual support (collaboration with top research labs). We welcome both reseach and engineering projects, as long as the open-source community needs them. Please contact [Zengyi Qin](https://www.qinzy.tech/) if you are interested.
89
+
90
+ **Contributing**
91
+
92
+ If you find this work useful, please consider contributing to the GitHub [repo](https://github.com/myshell-ai/MeloTTS).
93
+
94
+ - Many thanks to [@fakerybakery](https://github.com/fakerybakery) for adding the Web UI and CLI part.
95
+
96
+ ## License
97
+
98
+ This library is under MIT License, which means it is free for both commercial and non-commercial use.
99
+
100
+ ## Acknowledgements
101
+
102
+ This implementation is based on [TTS](https://github.com/coqui-ai/TTS), [VITS](https://github.com/jaywalnut310/vits), [VITS2](https://github.com/daniilrobnikov/vits2) and [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2). We appreciate their awesome work.
103
+
checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd278040eaf9536908e2b965273df5a731c44d8f0da66cc5fed7972772ed23c
3
+ size 207860748
config.json ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "segment_size": 16384
4
+ },
5
+ "data": {
6
+ "sampling_rate": 44100,
7
+ "filter_length": 2048,
8
+ "hop_length": 512,
9
+ "add_blank": true,
10
+ "n_speakers": 256,
11
+ "spk2id": {
12
+ "EN-US": 0,
13
+ "EN-BR": 1,
14
+ "EN_INDIA": 2,
15
+ "EN-AU": 3,
16
+ "EN-Default": 4
17
+ }
18
+ },
19
+ "model": {
20
+ "use_spk_conditioned_encoder": true,
21
+ "use_noise_scaled_mas": true,
22
+ "use_mel_posterior_encoder": false,
23
+ "use_duration_discriminator": true,
24
+ "inter_channels": 192,
25
+ "hidden_channels": 192,
26
+ "filter_channels": 768,
27
+ "n_heads": 2,
28
+ "n_layers": 6,
29
+ "n_layers_trans_flow": 3,
30
+ "kernel_size": 3,
31
+ "p_dropout": 0.1,
32
+ "resblock": "1",
33
+ "resblock_kernel_sizes": [
34
+ 3,
35
+ 7,
36
+ 11
37
+ ],
38
+ "resblock_dilation_sizes": [
39
+ [
40
+ 1,
41
+ 3,
42
+ 5
43
+ ],
44
+ [
45
+ 1,
46
+ 3,
47
+ 5
48
+ ],
49
+ [
50
+ 1,
51
+ 3,
52
+ 5
53
+ ]
54
+ ],
55
+ "upsample_rates": [
56
+ 8,
57
+ 8,
58
+ 2,
59
+ 2,
60
+ 2
61
+ ],
62
+ "upsample_initial_channel": 512,
63
+ "upsample_kernel_sizes": [
64
+ 16,
65
+ 16,
66
+ 8,
67
+ 2,
68
+ 2
69
+ ],
70
+ "n_layers_q": 3,
71
+ "use_spectral_norm": false,
72
+ "gin_channels": 256
73
+ },
74
+ "symbols": [
75
+ "_",
76
+ "\"",
77
+ "(",
78
+ ")",
79
+ "*",
80
+ "/",
81
+ ":",
82
+ "AA",
83
+ "E",
84
+ "EE",
85
+ "En",
86
+ "N",
87
+ "OO",
88
+ "Q",
89
+ "V",
90
+ "[",
91
+ "\\",
92
+ "]",
93
+ "^",
94
+ "a",
95
+ "a:",
96
+ "aa",
97
+ "ae",
98
+ "ah",
99
+ "ai",
100
+ "an",
101
+ "ang",
102
+ "ao",
103
+ "aw",
104
+ "ay",
105
+ "b",
106
+ "by",
107
+ "c",
108
+ "ch",
109
+ "d",
110
+ "dh",
111
+ "dy",
112
+ "e",
113
+ "e:",
114
+ "eh",
115
+ "ei",
116
+ "en",
117
+ "eng",
118
+ "er",
119
+ "ey",
120
+ "f",
121
+ "g",
122
+ "gy",
123
+ "h",
124
+ "hh",
125
+ "hy",
126
+ "i",
127
+ "i0",
128
+ "i:",
129
+ "ia",
130
+ "ian",
131
+ "iang",
132
+ "iao",
133
+ "ie",
134
+ "ih",
135
+ "in",
136
+ "ing",
137
+ "iong",
138
+ "ir",
139
+ "iu",
140
+ "iy",
141
+ "j",
142
+ "jh",
143
+ "k",
144
+ "ky",
145
+ "l",
146
+ "m",
147
+ "my",
148
+ "n",
149
+ "ng",
150
+ "ny",
151
+ "o",
152
+ "o:",
153
+ "ong",
154
+ "ou",
155
+ "ow",
156
+ "oy",
157
+ "p",
158
+ "py",
159
+ "q",
160
+ "r",
161
+ "ry",
162
+ "s",
163
+ "sh",
164
+ "t",
165
+ "th",
166
+ "ts",
167
+ "ty",
168
+ "u",
169
+ "u:",
170
+ "ua",
171
+ "uai",
172
+ "uan",
173
+ "uang",
174
+ "uh",
175
+ "ui",
176
+ "un",
177
+ "uo",
178
+ "uw",
179
+ "v",
180
+ "van",
181
+ "ve",
182
+ "vn",
183
+ "w",
184
+ "x",
185
+ "y",
186
+ "z",
187
+ "zh",
188
+ "zy",
189
+ "~",
190
+ "¡",
191
+ "¿",
192
+ "æ",
193
+ "ç",
194
+ "ð",
195
+ "ø",
196
+ "ŋ",
197
+ "œ",
198
+ "ɐ",
199
+ "ɑ",
200
+ "ɒ",
201
+ "ɔ",
202
+ "ɕ",
203
+ "ə",
204
+ "ɛ",
205
+ "ɜ",
206
+ "ɡ",
207
+ "ɣ",
208
+ "ɥ",
209
+ "ɦ",
210
+ "ɪ",
211
+ "ɫ",
212
+ "ɬ",
213
+ "ɭ",
214
+ "ɯ",
215
+ "ɲ",
216
+ "ɵ",
217
+ "ɸ",
218
+ "ɹ",
219
+ "ɾ",
220
+ "ʁ",
221
+ "ʃ",
222
+ "ʊ",
223
+ "ʌ",
224
+ "ʎ",
225
+ "ʏ",
226
+ "ʑ",
227
+ "ʒ",
228
+ "ʝ",
229
+ "ʲ",
230
+ "ˈ",
231
+ "ˌ",
232
+ "ː",
233
+ "̃",
234
+ "̩",
235
+ "β",
236
+ "θ",
237
+ "ᄀ",
238
+ "ᄁ",
239
+ "ᄂ",
240
+ "ᄃ",
241
+ "ᄄ",
242
+ "ᄅ",
243
+ "ᄆ",
244
+ "ᄇ",
245
+ "ᄈ",
246
+ "ᄉ",
247
+ "ᄊ",
248
+ "ᄋ",
249
+ "ᄌ",
250
+ "ᄍ",
251
+ "ᄎ",
252
+ "ᄏ",
253
+ "ᄐ",
254
+ "ᄑ",
255
+ "ᄒ",
256
+ "ᅡ",
257
+ "ᅢ",
258
+ "ᅣ",
259
+ "ᅤ",
260
+ "ᅥ",
261
+ "ᅦ",
262
+ "ᅧ",
263
+ "ᅨ",
264
+ "ᅩ",
265
+ "ᅪ",
266
+ "ᅫ",
267
+ "ᅬ",
268
+ "ᅭ",
269
+ "ᅮ",
270
+ "ᅯ",
271
+ "ᅰ",
272
+ "ᅱ",
273
+ "ᅲ",
274
+ "ᅳ",
275
+ "ᅴ",
276
+ "ᅵ",
277
+ "ᆨ",
278
+ "ᆫ",
279
+ "ᆮ",
280
+ "ᆯ",
281
+ "ᆷ",
282
+ "ᆸ",
283
+ "ᆼ",
284
+ "ㄸ",
285
+ "!",
286
+ "?",
287
+ "…",
288
+ ",",
289
+ ".",
290
+ "'",
291
+ "-",
292
+ "SP",
293
+ "UNK"
294
+ ],
295
+ "num_tones": 16,
296
+ "num_languages": 10
297
+ }