NeonBohdan commited on
Commit
5c12205
1 Parent(s): 3e57ccb

Added ukr-ru config

Browse files
Files changed (1) hide show
  1. config.json +34 -21
config.json CHANGED
@@ -43,11 +43,14 @@
43
  "lr_scheduler": "",
44
  "lr_scheduler_params": {},
45
  "use_grad_scaler": false,
 
 
 
 
46
  "model": "vits",
47
  "num_loader_workers": 8,
48
  "num_eval_loader_workers": 8,
49
  "use_noise_augment": false,
50
- "use_language_weighted_sampler": true,
51
  "audio": {
52
  "fft_size": 1024,
53
  "win_length": 1024,
@@ -61,8 +64,8 @@
61
  "ref_level_db": 20,
62
  "do_sound_norm": false,
63
  "log_func": "np.log",
64
- "do_trim_silence": false,
65
- "trim_db": 20.0,
66
  "do_rms_norm": false,
67
  "db_level": null,
68
  "power": 1.5,
@@ -75,7 +78,7 @@
75
  "do_amp_to_db_mel": true,
76
  "pitch_fmax": 640.0,
77
  "pitch_fmin": 0.0,
78
- "signal_norm": true,
79
  "min_level_db": -100,
80
  "symmetric_norm": true,
81
  "max_norm": 4.0,
@@ -97,7 +100,7 @@
97
  "eos": "<EOS>",
98
  "bos": "<BOS>",
99
  "blank": "<BLNK>",
100
- "characters": "a\u0105bc\u0107de\u0119fghijkl\u0142mn\u0144o\u00f3pqrs\u015btuvwxyz\u017a\u017c\u0430\u0431\u0432\u0433\u0491\u0434\u0435\u0454\u0436\u0437\u0438\u0456\u0457\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044c\u044e\u044f",
101
  "punctuations": "!'(),-.:;? ",
102
  "phonemes": null,
103
  "is_unique": true,
@@ -108,51 +111,61 @@
108
  "loss_masking": null,
109
  "sort_by_audio_len": true,
110
  "min_audio_len": 32768,
111
- "max_audio_len": 160000,
112
  "min_text_len": 1,
113
  "max_text_len": Infinity,
114
  "compute_f0": false,
115
  "compute_linear_spec": true,
116
- "precompute_num_workers": 0,
117
  "start_by_longest": false,
118
  "datasets": [
119
  {
120
  "name": "mailabs",
121
- "path": "",
122
  "meta_file_train": "",
123
- "ignored_speakers": null,
 
 
 
124
  "language": "uk",
125
  "meta_file_val": "",
126
  "meta_file_attn_mask": ""
127
  },
128
  {
129
  "name": "mailabs",
130
- "path": "",
131
  "meta_file_train": "",
132
- "ignored_speakers": null,
133
- "language": "pl",
 
 
 
134
  "meta_file_val": "",
135
  "meta_file_attn_mask": ""
136
  }
137
  ],
138
  "test_sentences": [
139
  [
140
- "\u041c\u0456\u0436 \u043d\u0438\u043c\u0438 \u0437\u043d\u043e\u0432 \u0437\u0430\u043a\u0440\u0443\u0442\u0438\u0432\u0441\u044f \u043f\u0430\u043d, \u0437\u0430\u043a\u0440\u0443\u0442\u0438\u0432\u0441\u044c \u043e\u0441\u0430\u0432\u0443\u043b\u0430.",
141
  "sumska",
142
  null,
143
  "uk"
144
  ],
145
  [
146
- "A mo\u017ce Wokulski ma g\u0142ow\u0119?",
147
- "nina_brown",
148
  null,
149
- "pl"
150
  ]
151
  ],
152
  "eval_split_max_size": null,
153
  "eval_split_size": 0.01,
 
 
 
 
154
  "model_args": {
155
- "num_chars": 83,
156
  "out_channels": 513,
157
  "spec_segment_size": 32,
158
  "hidden_channels": 192,
@@ -168,7 +181,7 @@
168
  "kernel_size_flow": 5,
169
  "dilation_rate_flow": 1,
170
  "num_layers_flow": 4,
171
- "resblock_type_decoder": "1",
172
  "resblock_kernel_sizes_decoder": [
173
  3,
174
  7,
@@ -204,7 +217,7 @@
204
  4,
205
  4
206
  ],
207
- "use_sdp": false,
208
  "noise_scale": 1.0,
209
  "inference_noise_scale": 0.667,
210
  "length_scale": 1,
@@ -222,7 +235,7 @@
222
  "d_vector_dim": 0,
223
  "detach_dp_input": true,
224
  "use_language_embedding": true,
225
- "embedded_language_dim": 2,
226
  "num_languages": 2,
227
  "language_ids_file": null,
228
  "use_speaker_encoder_as_loss": false,
@@ -256,7 +269,7 @@
256
  "speaker_encoder_loss_alpha": 1.0,
257
  "return_wav": true,
258
  "r": 1,
259
- "num_speakers": 5,
260
  "use_speaker_embedding": true,
261
  "speakers_file": null,
262
  "speaker_embedding_channels": 256,
 
43
  "lr_scheduler": "",
44
  "lr_scheduler_params": {},
45
  "use_grad_scaler": false,
46
+ "cudnn_enable": true,
47
+ "cudnn_deterministic": false,
48
+ "cudnn_benchmark": true,
49
+ "training_seed": 54321,
50
  "model": "vits",
51
  "num_loader_workers": 8,
52
  "num_eval_loader_workers": 8,
53
  "use_noise_augment": false,
 
54
  "audio": {
55
  "fft_size": 1024,
56
  "win_length": 1024,
 
64
  "ref_level_db": 20,
65
  "do_sound_norm": false,
66
  "log_func": "np.log",
67
+ "do_trim_silence": true,
68
+ "trim_db": 45,
69
  "do_rms_norm": false,
70
  "db_level": null,
71
  "power": 1.5,
 
78
  "do_amp_to_db_mel": true,
79
  "pitch_fmax": 640.0,
80
  "pitch_fmin": 0.0,
81
+ "signal_norm": false,
82
  "min_level_db": -100,
83
  "symmetric_norm": true,
84
  "max_norm": 4.0,
 
100
  "eos": "<EOS>",
101
  "bos": "<BOS>",
102
  "blank": "<BLNK>",
103
+ "characters": "\u0430\u0431\u0432\u0433\u0491\u0434\u0435\u0454\u0436\u0437\u0438\u0456\u0457\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044c\u044e\u044f\u044d\u0451\u044b\u044a",
104
  "punctuations": "!'(),-.:;? ",
105
  "phonemes": null,
106
  "is_unique": true,
 
111
  "loss_masking": null,
112
  "sort_by_audio_len": true,
113
  "min_audio_len": 32768,
114
+ "max_audio_len": 224000,
115
  "min_text_len": 1,
116
  "max_text_len": Infinity,
117
  "compute_f0": false,
118
  "compute_linear_spec": true,
119
+ "precompute_num_workers": 12,
120
  "start_by_longest": false,
121
  "datasets": [
122
  {
123
  "name": "mailabs",
124
+ "path": "./logs/uk_UK",
125
  "meta_file_train": "",
126
+ "ignored_speakers": [
127
+ "obruchov",
128
+ "shepel"
129
+ ],
130
  "language": "uk",
131
  "meta_file_val": "",
132
  "meta_file_attn_mask": ""
133
  },
134
  {
135
  "name": "mailabs",
136
+ "path": "./logs/ru_RU",
137
  "meta_file_train": "",
138
+ "ignored_speakers": [
139
+ "minaev",
140
+ "nikolaev"
141
+ ],
142
+ "language": "ru",
143
  "meta_file_val": "",
144
  "meta_file_attn_mask": ""
145
  }
146
  ],
147
  "test_sentences": [
148
  [
149
+ "\u0412\u0435\u0441\u0435\u043b\u043a\u0430, \u0442\u0430\u043a\u043e\u0436 \u0440\u0430\u0439\u0434\u0443\u0433\u0430 \u043e\u043f\u0442\u0438\u0447\u043d\u0435 \u044f\u0432\u0438\u0449\u0435 \u0432 \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u0456, \u0449\u043e \u044f\u0432\u043b\u044f\u0454 \u0441\u043e\u0431\u043e\u044e \u043e\u0434\u043d\u0443, \u0434\u0432\u0456 \u0447\u0438 \u0434\u0435\u043a\u0456\u043b\u044c\u043a\u0430 \u0440\u0456\u0437\u043d\u043e\u043a\u043e\u043b\u044c\u043e\u0440\u043e\u0432\u0438\u0445 \u0434\u0443\u0433.",
150
  "sumska",
151
  null,
152
  "uk"
153
  ],
154
  [
155
+ "\u0420\u0430\u0434\u0443\u0433\u0430, \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u043d\u043e\u0435, \u043e\u043f\u0442\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u0438 \u043c\u0435\u0442\u0435\u043e\u0440\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u044f\u0432\u043b\u0435\u043d\u0438\u0435, \u043d\u0430\u0431\u043b\u044e\u0434\u0430\u0435\u043c\u043e\u0435 \u043f\u0440\u0438 \u043e\u0441\u0432\u0435\u0449\u0435\u043d\u0438\u0438 \u044f\u0440\u043a\u0438\u043c \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a\u043e\u043c \u0441\u0432\u0435\u0442\u0430.",
156
+ "hajdurova",
157
  null,
158
+ "ru"
159
  ]
160
  ],
161
  "eval_split_max_size": null,
162
  "eval_split_size": 0.01,
163
+ "use_speaker_weighted_sampler": false,
164
+ "speaker_weighted_sampler_alpha": 1.0,
165
+ "use_language_weighted_sampler": true,
166
+ "language_weighted_sampler_alpha": 1.0,
167
  "model_args": {
168
+ "num_chars": 52,
169
  "out_channels": 513,
170
  "spec_segment_size": 32,
171
  "hidden_channels": 192,
 
181
  "kernel_size_flow": 5,
182
  "dilation_rate_flow": 1,
183
  "num_layers_flow": 4,
184
+ "resblock_type_decoder": "2",
185
  "resblock_kernel_sizes_decoder": [
186
  3,
187
  7,
 
217
  4,
218
  4
219
  ],
220
+ "use_sdp": true,
221
  "noise_scale": 1.0,
222
  "inference_noise_scale": 0.667,
223
  "length_scale": 1,
 
235
  "d_vector_dim": 0,
236
  "detach_dp_input": true,
237
  "use_language_embedding": true,
238
+ "embedded_language_dim": 4,
239
  "num_languages": 2,
240
  "language_ids_file": null,
241
  "use_speaker_encoder_as_loss": false,
 
269
  "speaker_encoder_loss_alpha": 1.0,
270
  "return_wav": true,
271
  "r": 1,
272
+ "num_speakers": 2,
273
  "use_speaker_embedding": true,
274
  "speakers_file": null,
275
  "speaker_embedding_channels": 256,