oza75 commited on
Commit
4b6aade
1 Parent(s): cc1bf60

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. config.json +227 -0
  2. dvae.pth +3 -0
  3. mel_stats.pth +3 -0
  4. model.pth +3 -0
  5. trainer_0_log.txt +0 -0
  6. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/workspace/coqui-TTS/finetuning/bambara/run/training",
3
+ "logger_uri": null,
4
+ "run_name": "GPT_XTTS_v2.0_BAM_FT",
5
+ "project_name": "BAM_FINE_TUNING_3",
6
+ "run_description": "\n GPT XTTS training\n ",
7
+ "print_step": 50,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "wandb",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 1000,
14
+ "save_step": 10000,
15
+ "save_n_checkpoints": 1,
16
+ "save_checkpoints": true,
17
+ "save_all_best": false,
18
+ "save_best_after": 0,
19
+ "target_loss": null,
20
+ "print_eval": false,
21
+ "test_delay_epochs": 0,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": false,
27
+ "precision": "fp16",
28
+ "epochs": 60,
29
+ "batch_size": 3,
30
+ "eval_batch_size": 3,
31
+ "grad_clip": 0.0,
32
+ "scheduler_after_epoch": true,
33
+ "lr": 5e-06,
34
+ "optimizer": "AdamW",
35
+ "optimizer_params": {
36
+ "betas": [
37
+ 0.9,
38
+ 0.96
39
+ ],
40
+ "eps": 1e-08,
41
+ "weight_decay": 0.01
42
+ },
43
+ "lr_scheduler": "MultiStepLR",
44
+ "lr_scheduler_params": {
45
+ "milestones": [
46
+ 900000,
47
+ 2700000,
48
+ 5400000
49
+ ],
50
+ "gamma": 0.5,
51
+ "last_epoch": -1
52
+ },
53
+ "use_grad_scaler": false,
54
+ "allow_tf32": false,
55
+ "cudnn_enable": true,
56
+ "cudnn_deterministic": false,
57
+ "cudnn_benchmark": false,
58
+ "training_seed": 1,
59
+ "model": "xtts",
60
+ "num_loader_workers": 8,
61
+ "num_eval_loader_workers": 0,
62
+ "use_noise_augment": false,
63
+ "audio": {
64
+ "sample_rate": 22050,
65
+ "output_sample_rate": 24000,
66
+ "dvae_sample_rate": 22050
67
+ },
68
+ "use_phonemes": false,
69
+ "phonemizer": null,
70
+ "phoneme_language": null,
71
+ "compute_input_seq_cache": false,
72
+ "text_cleaner": null,
73
+ "enable_eos_bos_chars": false,
74
+ "test_sentences_file": "",
75
+ "phoneme_cache_path": null,
76
+ "characters": null,
77
+ "add_blank": false,
78
+ "batch_group_size": 48,
79
+ "loss_masking": null,
80
+ "min_audio_len": 1,
81
+ "max_audio_len": Infinity,
82
+ "min_text_len": 1,
83
+ "max_text_len": Infinity,
84
+ "compute_f0": false,
85
+ "compute_energy": false,
86
+ "compute_linear_spec": false,
87
+ "precompute_num_workers": 0,
88
+ "start_by_longest": false,
89
+ "shuffle": false,
90
+ "drop_last": false,
91
+ "datasets": [
92
+ {
93
+ "formatter": "",
94
+ "dataset_name": "",
95
+ "path": "",
96
+ "meta_file_train": "",
97
+ "ignored_speakers": null,
98
+ "language": "",
99
+ "phonemizer": "",
100
+ "meta_file_val": "",
101
+ "meta_file_attn_mask": ""
102
+ }
103
+ ],
104
+ "test_sentences": [
105
+ {
106
+ "text": "Dumuni b\u025b taa farikolo fan jum\u025bn ?",
107
+ "speaker_wav": [
108
+ "./reference_speaker.wav"
109
+ ],
110
+ "language": "bm"
111
+ },
112
+ {
113
+ "text": "Ni sumaya furak\u025bli damin\u025bna, an ka kan ka to ka fura ta ka taa \u0272\u025b, walima ka to ka pikiri ni s\u0254r\u0254muw k\u025b ka taa \u0272\u025b fo sumaya ka ban pew.",
114
+ "speaker_wav": [
115
+ "./reference_speaker.wav"
116
+ ],
117
+ "language": "bm"
118
+ },
119
+ {
120
+ "text": "A ko k\u025bra degunba ye jamanadenw ma k\u025br\u025bnk\u025br\u025bnna demis\u025bn finitiniw ni m\u0254g\u0254 k\u0254r\u0254baw.",
121
+ "speaker_wav": [
122
+ "./reference_speaker.wav"
123
+ ],
124
+ "language": "bm"
125
+ },
126
+ {
127
+ "text": "le texte devra attendre l\u2019avis du Conseil constitutionnel avant son examen \u00e0 l\u2019Assembl\u00e9e.",
128
+ "speaker_wav": [
129
+ "./reference_speaker.wav"
130
+ ],
131
+ "language": "fr"
132
+ },
133
+ {
134
+ "text": "Les Insoumis ont obtenu ce mardi 9 avril que le texte soit retir\u00e9 de l\u2019ordre du jour de l\u2019Assembl\u00e9e nationale en attendant un avis du Conseil constitutionnel.",
135
+ "speaker_wav": [
136
+ "./reference_speaker.wav"
137
+ ],
138
+ "language": "fr"
139
+ }
140
+ ],
141
+ "eval_split_max_size": 256,
142
+ "eval_split_size": 0.01,
143
+ "use_speaker_weighted_sampler": false,
144
+ "speaker_weighted_sampler_alpha": 1.0,
145
+ "use_language_weighted_sampler": false,
146
+ "language_weighted_sampler_alpha": 1.0,
147
+ "use_length_weighted_sampler": false,
148
+ "length_weighted_sampler_alpha": 1.0,
149
+ "model_args": {
150
+ "gpt_batch_size": 1,
151
+ "enable_redaction": false,
152
+ "kv_cache": true,
153
+ "gpt_checkpoint": "",
154
+ "clvp_checkpoint": null,
155
+ "decoder_checkpoint": null,
156
+ "num_chars": 255,
157
+ "tokenizer_file": "./vocab.json",
158
+ "gpt_max_audio_tokens": 605,
159
+ "gpt_max_text_tokens": 402,
160
+ "gpt_max_prompt_tokens": 70,
161
+ "gpt_layers": 30,
162
+ "gpt_n_model_channels": 1024,
163
+ "gpt_n_heads": 16,
164
+ "gpt_number_text_tokens": 8130,
165
+ "gpt_start_text_token": 261,
166
+ "gpt_stop_text_token": 0,
167
+ "gpt_num_audio_tokens": 1026,
168
+ "gpt_start_audio_token": 1024,
169
+ "gpt_stop_audio_token": 1025,
170
+ "gpt_code_stride_len": 1024,
171
+ "gpt_use_masking_gt_prompt_approach": true,
172
+ "gpt_use_perceiver_resampler": true,
173
+ "input_sample_rate": 22050,
174
+ "output_sample_rate": 24000,
175
+ "output_hop_length": 256,
176
+ "decoder_input_dim": 1024,
177
+ "d_vector_dim": 512,
178
+ "cond_d_vector_in_each_upsampling_layer": true,
179
+ "duration_const": 102400,
180
+ "min_conditioning_length": 66150,
181
+ "max_conditioning_length": 132300,
182
+ "gpt_loss_text_ce_weight": 0.01,
183
+ "gpt_loss_mel_ce_weight": 1.0,
184
+ "debug_loading_failures": false,
185
+ "max_wav_length": 255995,
186
+ "max_text_length": 200,
187
+ "mel_norm_file": "/workspace/coqui-TTS/finetuning/bambara/run/training/XTTS_v2.0_original_model_files/mel_stats.pth",
188
+ "dvae_checkpoint": "/workspace/coqui-TTS/finetuning/bambara/run/training/XTTS_v2.0_original_model_files/dvae.pth",
189
+ "xtts_checkpoint": "/workspace/coqui-TTS/finetuning/bambara/run/training/GPT_XTTS_v2.0_BAM_FT-April-09-2024_02+58PM-58810011/model.pth",
190
+ "vocoder": ""
191
+ },
192
+ "model_dir": null,
193
+ "languages": [
194
+ "en",
195
+ "es",
196
+ "fr",
197
+ "de",
198
+ "it",
199
+ "pt",
200
+ "pl",
201
+ "tr",
202
+ "ru",
203
+ "nl",
204
+ "cs",
205
+ "ar",
206
+ "zh-cn",
207
+ "hu",
208
+ "ko",
209
+ "ja",
210
+ "hi",
211
+ "bm"
212
+ ],
213
+ "temperature": 0.85,
214
+ "length_penalty": 1.0,
215
+ "repetition_penalty": 2.0,
216
+ "top_k": 50,
217
+ "top_p": 0.85,
218
+ "num_gpt_outputs": 1,
219
+ "gpt_cond_len": 12,
220
+ "gpt_cond_chunk_len": 4,
221
+ "max_ref_len": 10,
222
+ "sound_norm_refs": false,
223
+ "optimizer_wd_only_on_weights": true,
224
+ "weighted_loss_attrs": {},
225
+ "weighted_loss_multipliers": {},
226
+ "github_branch": "* dev"
227
+ }
dvae.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29bc227d410d4991e0a8c09b858f77415013eeb9fba9650258e96095557d97a
3
+ size 210514388
mel_stats.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f69422a8a8f344c4fca2f0c6b8d41d2151d6615b7321e48e6bb15ae949b119c
3
+ size 1067
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e114931b09dea049f64b9f885c440984239a8a6ead490754ceb73b22be3d60c
3
+ size 5643555885
trainer_0_log.txt ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff