oza75 commited on
Commit
0cd9c96
1 Parent(s): 48232cf

Upload config_enhanced_01.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config_enhanced_01.json +329 -0
config_enhanced_01.json ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/workspace/coqui-TTS/finetuning/bambara/run/training",
3
+ "logger_uri": null,
4
+ "run_name": "xtts_lr_8e-06",
5
+ "project_name": "BAM_FINE_TUNING_3",
6
+ "run_description": "\n GPT XTTS training\n ",
7
+ "print_step": 50,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "wandb",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 1000,
14
+ "save_step": 10000,
15
+ "save_n_checkpoints": 1,
16
+ "save_checkpoints": true,
17
+ "save_all_best": false,
18
+ "save_best_after": 0,
19
+ "target_loss": null,
20
+ "print_eval": false,
21
+ "test_delay_epochs": 0,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": false,
27
+ "precision": "fp16",
28
+ "epochs": 40,
29
+ "batch_size": 3,
30
+ "eval_batch_size": 3,
31
+ "grad_clip": 0.0,
32
+ "scheduler_after_epoch": true,
33
+ "lr": 8e-06,
34
+ "optimizer": "AdamW",
35
+ "optimizer_params": {
36
+ "betas": [
37
+ 0.9,
38
+ 0.96
39
+ ],
40
+ "eps": 1e-08,
41
+ "weight_decay": 0.01
42
+ },
43
+ "lr_scheduler": "MultiStepLR",
44
+ "lr_scheduler_params": {
45
+ "milestones": [
46
+ 900000,
47
+ 2700000,
48
+ 5400000
49
+ ],
50
+ "gamma": 0.5,
51
+ "last_epoch": -1
52
+ },
53
+ "use_grad_scaler": false,
54
+ "allow_tf32": false,
55
+ "cudnn_enable": true,
56
+ "cudnn_deterministic": false,
57
+ "cudnn_benchmark": false,
58
+ "training_seed": 1,
59
+ "model": "xtts",
60
+ "num_loader_workers": 8,
61
+ "num_eval_loader_workers": 0,
62
+ "use_noise_augment": false,
63
+ "audio": {
64
+ "sample_rate": 22050,
65
+ "output_sample_rate": 24000,
66
+ "dvae_sample_rate": 22050
67
+ },
68
+ "use_phonemes": false,
69
+ "phonemizer": null,
70
+ "phoneme_language": null,
71
+ "compute_input_seq_cache": false,
72
+ "text_cleaner": null,
73
+ "enable_eos_bos_chars": false,
74
+ "test_sentences_file": "",
75
+ "phoneme_cache_path": null,
76
+ "characters": null,
77
+ "add_blank": false,
78
+ "batch_group_size": 48,
79
+ "loss_masking": null,
80
+ "min_audio_len": 1,
81
+ "max_audio_len": Infinity,
82
+ "min_text_len": 1,
83
+ "max_text_len": Infinity,
84
+ "compute_f0": false,
85
+ "compute_energy": false,
86
+ "compute_linear_spec": false,
87
+ "precompute_num_workers": 0,
88
+ "start_by_longest": false,
89
+ "shuffle": false,
90
+ "drop_last": false,
91
+ "datasets": [
92
+ {
93
+ "formatter": "",
94
+ "dataset_name": "",
95
+ "path": "",
96
+ "meta_file_train": "",
97
+ "ignored_speakers": null,
98
+ "language": "",
99
+ "phonemizer": "",
100
+ "meta_file_val": "",
101
+ "meta_file_attn_mask": ""
102
+ }
103
+ ],
104
+ "test_sentences": [
105
+ {
106
+ "text": "Dumuni b\u025b taa farikolo fan jum\u025bn ?",
107
+ "speaker_wav": [
108
+ "./reference_audios/bm/speaker_10/0.wav",
109
+ "./reference_audios/bm/speaker_10/1.wav",
110
+ "./reference_audios/bm/speaker_10/3.wav",
111
+ "./reference_audios/bm/speaker_10/4.wav",
112
+ "./reference_audios/bm/speaker_10/5.wav",
113
+ "./reference_audios/bm/speaker_10/6.wav",
114
+ "./reference_audios/bm/speaker_10/7.wav",
115
+ "./reference_audios/bm/speaker_10/8.wav",
116
+ "./reference_audios/bm/speaker_10/9.wav"
117
+ ],
118
+ "language": "bm"
119
+ },
120
+ {
121
+ "text": "Ni sumaya furak\u025bli damin\u025bna, an ka kan ka to ka fura ta ka taa \u0272\u025b, walima ka to ka pikiri ni s\u0254r\u0254muw k\u025b ka taa \u0272\u025b fo sumaya ka ban pew.",
122
+ "speaker_wav": [
123
+ "./reference_audios/bm/speaker_14/0.wav",
124
+ "./reference_audios/bm/speaker_14/1.wav",
125
+ "./reference_audios/bm/speaker_14/2.wav",
126
+ "./reference_audios/bm/speaker_14/3.wav",
127
+ "./reference_audios/bm/speaker_14/4.wav",
128
+ "./reference_audios/bm/speaker_14/5.wav",
129
+ "./reference_audios/bm/speaker_14/6.wav",
130
+ "./reference_audios/bm/speaker_14/7.wav",
131
+ "./reference_audios/bm/speaker_14/8.wav"
132
+ ],
133
+ "language": "bm"
134
+ },
135
+ {
136
+ "text": "A ko k\u025bra degunba ye jamanadenw ma k\u025br\u025bnk\u025br\u025bnna demis\u025bn finitiniw ni m\u0254g\u0254 k\u0254r\u0254baw.",
137
+ "speaker_wav": [
138
+ "./reference_audios/bm/speaker_15/0.wav",
139
+ "./reference_audios/bm/speaker_15/1.wav",
140
+ "./reference_audios/bm/speaker_15/2.wav",
141
+ "./reference_audios/bm/speaker_15/3.wav",
142
+ "./reference_audios/bm/speaker_15/4.wav",
143
+ "./reference_audios/bm/speaker_15/6.wav",
144
+ "./reference_audios/bm/speaker_15/7.wav"
145
+ ],
146
+ "language": "bm"
147
+ },
148
+ {
149
+ "text": "Silam\u025b dannabaaw Burkina Faso la, u ye Eid El Fitr seli k\u025b seli la min k\u025bra sun kalo laban don na .",
150
+ "speaker_wav": [
151
+ "./reference_audios/bm/speaker_27/0.wav",
152
+ "./reference_audios/bm/speaker_27/1.wav",
153
+ "./reference_audios/bm/speaker_27/2.wav",
154
+ "./reference_audios/bm/speaker_27/3.wav",
155
+ "./reference_audios/bm/speaker_27/7.wav",
156
+ "./reference_audios/bm/speaker_27/8.wav",
157
+ "./reference_audios/bm/speaker_27/9.wav"
158
+ ],
159
+ "language": "bm"
160
+ },
161
+ {
162
+ "text": "le texte devra attendre l\u2019avis du Conseil constitutionnel avant son examen \u00e0 l\u2019Assembl\u00e9e.",
163
+ "speaker_wav": [
164
+ "./reference_audios/fr/speaker_100/0.wav",
165
+ "./reference_audios/fr/speaker_100/1.wav",
166
+ "./reference_audios/fr/speaker_100/2.wav",
167
+ "./reference_audios/fr/speaker_100/3.wav",
168
+ "./reference_audios/fr/speaker_100/4.wav",
169
+ "./reference_audios/fr/speaker_100/5.wav",
170
+ "./reference_audios/fr/speaker_100/6.wav",
171
+ "./reference_audios/fr/speaker_100/7.wav",
172
+ "./reference_audios/fr/speaker_100/8.wav",
173
+ "./reference_audios/fr/speaker_100/9.wav"
174
+ ],
175
+ "language": "fr"
176
+ },
177
+ {
178
+ "text": "Below are benchmarks for downsampling and upsampling waveforms between two pairs of sampling rates.",
179
+ "speaker_wav": [
180
+ "./reference_audios/en/speaker_98/0.wav",
181
+ "./reference_audios/en/speaker_98/1.wav",
182
+ "./reference_audios/en/speaker_98/2.wav",
183
+ "./reference_audios/en/speaker_98/3.wav",
184
+ "./reference_audios/en/speaker_98/4.wav",
185
+ "./reference_audios/en/speaker_98/5.wav",
186
+ "./reference_audios/en/speaker_98/6.wav",
187
+ "./reference_audios/en/speaker_98/7.wav",
188
+ "./reference_audios/en/speaker_98/8.wav",
189
+ "./reference_audios/en/speaker_98/9.wav"
190
+ ],
191
+ "language": "en"
192
+ },
193
+ {
194
+ "text": "La convivencia se asienta en Euskadi con la asignatura pendiente de la memoria",
195
+ "speaker_wav": [
196
+ "./reference_audios/es/speaker_47/0.wav",
197
+ "./reference_audios/es/speaker_47/1.wav",
198
+ "./reference_audios/es/speaker_47/2.wav",
199
+ "./reference_audios/es/speaker_47/3.wav",
200
+ "./reference_audios/es/speaker_47/4.wav",
201
+ "./reference_audios/es/speaker_47/5.wav",
202
+ "./reference_audios/es/speaker_47/6.wav",
203
+ "./reference_audios/es/speaker_47/7.wav",
204
+ "./reference_audios/es/speaker_47/8.wav",
205
+ "./reference_audios/es/speaker_47/9.wav"
206
+ ],
207
+ "language": "es"
208
+ },
209
+ {
210
+ "text": "Quei mariuoli di troppo alla corte dell\u2019ex sceriffo. Cos\u00ec il sistema Emiliano sta affondando la Puglia",
211
+ "speaker_wav": [
212
+ "./reference_audios/it/speaker_32/0.wav",
213
+ "./reference_audios/it/speaker_32/1.wav",
214
+ "./reference_audios/it/speaker_32/2.wav",
215
+ "./reference_audios/it/speaker_32/3.wav",
216
+ "./reference_audios/it/speaker_32/4.wav",
217
+ "./reference_audios/it/speaker_32/5.wav",
218
+ "./reference_audios/it/speaker_32/6.wav",
219
+ "./reference_audios/it/speaker_32/7.wav",
220
+ "./reference_audios/it/speaker_32/8.wav",
221
+ "./reference_audios/it/speaker_32/9.wav"
222
+ ],
223
+ "language": "it"
224
+ },
225
+ {
226
+ "text": "Les Insoumis ont obtenu ce mardi 9 avril que le texte soit retir\u00e9 de l\u2019ordre du jour de l\u2019Assembl\u00e9e nationale en attendant un avis du Conseil constitutionnel.",
227
+ "speaker_wav": [
228
+ "./reference_audios/fr/speaker_100/0.wav",
229
+ "./reference_audios/fr/speaker_100/1.wav",
230
+ "./reference_audios/fr/speaker_100/2.wav",
231
+ "./reference_audios/fr/speaker_100/3.wav",
232
+ "./reference_audios/fr/speaker_100/4.wav",
233
+ "./reference_audios/fr/speaker_100/5.wav",
234
+ "./reference_audios/fr/speaker_100/6.wav",
235
+ "./reference_audios/fr/speaker_100/7.wav",
236
+ "./reference_audios/fr/speaker_100/8.wav",
237
+ "./reference_audios/fr/speaker_100/9.wav"
238
+ ],
239
+ "language": "fr"
240
+ }
241
+ ],
242
+ "eval_split_max_size": 256,
243
+ "eval_split_size": 0.01,
244
+ "use_speaker_weighted_sampler": false,
245
+ "speaker_weighted_sampler_alpha": 1.0,
246
+ "use_language_weighted_sampler": false,
247
+ "language_weighted_sampler_alpha": 1.0,
248
+ "use_length_weighted_sampler": false,
249
+ "length_weighted_sampler_alpha": 1.0,
250
+ "model_args": {
251
+ "gpt_batch_size": 1,
252
+ "enable_redaction": false,
253
+ "kv_cache": true,
254
+ "gpt_checkpoint": "",
255
+ "clvp_checkpoint": null,
256
+ "decoder_checkpoint": null,
257
+ "num_chars": 255,
258
+ "tokenizer_file": "./saved/combined_vocab.json",
259
+ "gpt_max_audio_tokens": 605,
260
+ "gpt_max_text_tokens": 402,
261
+ "gpt_max_prompt_tokens": 70,
262
+ "gpt_layers": 30,
263
+ "gpt_n_model_channels": 1024,
264
+ "gpt_n_heads": 16,
265
+ "gpt_number_text_tokens": 8130,
266
+ "gpt_start_text_token": 261,
267
+ "gpt_stop_text_token": 0,
268
+ "gpt_num_audio_tokens": 1026,
269
+ "gpt_start_audio_token": 1024,
270
+ "gpt_stop_audio_token": 1025,
271
+ "gpt_code_stride_len": 1024,
272
+ "gpt_use_masking_gt_prompt_approach": true,
273
+ "gpt_use_perceiver_resampler": true,
274
+ "input_sample_rate": 22050,
275
+ "output_sample_rate": 24000,
276
+ "output_hop_length": 256,
277
+ "decoder_input_dim": 1024,
278
+ "d_vector_dim": 512,
279
+ "cond_d_vector_in_each_upsampling_layer": true,
280
+ "duration_const": 102400,
281
+ "min_conditioning_length": 66150,
282
+ "max_conditioning_length": 132300,
283
+ "gpt_loss_text_ce_weight": 0.01,
284
+ "gpt_loss_mel_ce_weight": 1.0,
285
+ "debug_loading_failures": false,
286
+ "max_wav_length": 255995,
287
+ "max_text_length": 200,
288
+ "mel_norm_file": "/workspace/coqui-TTS/finetuning/bambara/run/training/XTTS_v2.0_original_model_files/mel_stats.pth",
289
+ "dvae_checkpoint": "/workspace/coqui-TTS/finetuning/bambara/run/training/XTTS_v2.0_original_model_files/dvae.pth",
290
+ "xtts_checkpoint": "/workspace/coqui-TTS/finetuning/bambara/run/training/XTTS_v2.0_original_model_files/model.pth",
291
+ "vocoder": ""
292
+ },
293
+ "model_dir": null,
294
+ "languages": [
295
+ "en",
296
+ "es",
297
+ "fr",
298
+ "de",
299
+ "it",
300
+ "pt",
301
+ "pl",
302
+ "tr",
303
+ "ru",
304
+ "nl",
305
+ "cs",
306
+ "ar",
307
+ "zh-cn",
308
+ "hu",
309
+ "ko",
310
+ "ja",
311
+ "hi",
312
+ "bm"
313
+ ],
314
+ "temperature": 0.85,
315
+ "length_penalty": 1.0,
316
+ "repetition_penalty": 2.0,
317
+ "top_k": 50,
318
+ "top_p": 0.85,
319
+ "num_gpt_outputs": 1,
320
+ "gpt_cond_len": 12,
321
+ "gpt_cond_chunk_len": 4,
322
+ "max_ref_len": 10,
323
+ "sound_norm_refs": false,
324
+ "optimizer_wd_only_on_weights": true,
325
+ "weighted_loss_attrs": {},
326
+ "weighted_loss_multipliers": {},
327
+ "transliterate_bambara": false,
328
+ "github_branch": "* dev"
329
+ }