Setsugesuka commited on
Commit
9eac011
1 Parent(s): 4ef3163

Upload 12 files

Browse files
hifigan_speech/args.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/vocoder/gan/exp_config_base.json",
3
+ "dataset": [
4
+ "libritts",
5
+ "ljspeech",
6
+ "vctk",
7
+ ],
8
+ "exp_name": "hifigan_speech",
9
+ "inference": {
10
+ "batch_size": 1,
11
+ },
12
+ "model": {
13
+ "discriminators": [
14
+ "msd",
15
+ "mpd",
16
+ "mssbcqtd",
17
+ "msstftd",
18
+ ],
19
+ "generator": "hifigan",
20
+ "hifigan": {
21
+ "resblock": "1",
22
+ "resblock_dilation_sizes": [
23
+ [
24
+ 1,
25
+ 3,
26
+ 5,
27
+ ],
28
+ [
29
+ 1,
30
+ 3,
31
+ 5,
32
+ ],
33
+ [
34
+ 1,
35
+ 3,
36
+ 5,
37
+ ],
38
+ ],
39
+ "resblock_kernel_sizes": [
40
+ 3,
41
+ 5,
42
+ 7,
43
+ ],
44
+ "upsample_initial_channel": 768,
45
+ "upsample_kernel_sizes": [
46
+ 16,
47
+ 8,
48
+ 4,
49
+ 4,
50
+ 4,
51
+ ],
52
+ "upsample_rates": [
53
+ 8,
54
+ 4,
55
+ 2,
56
+ 2,
57
+ 2,
58
+ ],
59
+ },
60
+ "mpd": {
61
+ "discriminator_channel_mult_factor": 1,
62
+ "discriminator_channel_multi": 1,
63
+ "mpd_reshapes": [
64
+ 2,
65
+ 3,
66
+ 5,
67
+ 7,
68
+ 11,
69
+ 17,
70
+ 23,
71
+ 37,
72
+ ],
73
+ "use_spectral_norm": false,
74
+ },
75
+ "mrd": {
76
+ "discriminator_channel_mult_factor": 1,
77
+ "mrd_override": false,
78
+ "resolutions": [
79
+ [
80
+ 1024,
81
+ 120,
82
+ 600,
83
+ ],
84
+ [
85
+ 2048,
86
+ 240,
87
+ 1200,
88
+ ],
89
+ [
90
+ 512,
91
+ 50,
92
+ 240,
93
+ ],
94
+ ],
95
+ "use_spectral_norm": false,
96
+ },
97
+ "mssbcqtd": {
98
+ "bins_per_octaves": [
99
+ 24,
100
+ 36,
101
+ 48,
102
+ ],
103
+ "dilations": [
104
+ 1,
105
+ 2,
106
+ 4,
107
+ ],
108
+ "filters": 32,
109
+ "filters_scale": 1,
110
+ "hop_lengths": [
111
+ 512,
112
+ 256,
113
+ 256,
114
+ ],
115
+ "in_channels": 1,
116
+ "max_filters": 1024,
117
+ "n_octaves": [
118
+ 9,
119
+ 9,
120
+ 9,
121
+ ],
122
+ "out_channels": 1,
123
+ },
124
+ "msstftd": {
125
+ "filters": 32,
126
+ },
127
+ },
128
+ "model_type": "GANVocoder",
129
+ "preprocess": {
130
+ "audio_dir": "audios",
131
+ "bits": 8,
132
+ "contentvec_dir": "contentvec",
133
+ "cut_mel_frame": 32,
134
+ "data_augment": false,
135
+ "dur_dir": "durs",
136
+ "duration_dir": "duration",
137
+ "emo2id": "emo2id.json",
138
+ "energy_dir": "energys",
139
+ "energy_extract_mode": "from_mel",
140
+ "energy_norm": false,
141
+ "extract_amplitude_phase": false,
142
+ "extract_audio": true,
143
+ "extract_contentvec_feature": false,
144
+ "extract_duration": false,
145
+ "extract_energy": false,
146
+ "extract_label": false,
147
+ "extract_linear_spec": false,
148
+ "extract_mcep": false,
149
+ "extract_mel": true,
150
+ "extract_mert_feature": false,
151
+ "extract_one_hot": false,
152
+ "extract_pitch": false,
153
+ "extract_uv": false,
154
+ "extract_wenet_feature": false,
155
+ "extract_whisper_feature": false,
156
+ "f0_max": 1100,
157
+ "f0_min": 50,
158
+ "file_lst": "file.lst",
159
+ "fmax": 12000,
160
+ "fmin": 0,
161
+ "hop_size": 256,
162
+ "imaginary_dir": "imaginarys",
163
+ "is_mu_law": false,
164
+ "lab_dir": "labs",
165
+ "label_dir": "labels",
166
+ "linear_dir": "linears",
167
+ "log_amplitude_dir": "log_amplitudes",
168
+ "mcep_dir": "mcep",
169
+ "mel_dir": "mels",
170
+ "mel_min_max_norm": false,
171
+ "min_level_db": -115,
172
+ "n_fft": 1024,
173
+ "n_mel": 100,
174
+ "num_silent_frames": 8,
175
+ "phase_dir": "phases",
176
+ "phone_seq_file": "phone_seq_file",
177
+ "pitch_bin": 256,
178
+ "pitch_dir": "pitches",
179
+ "pitch_extractor": "parselmouth",
180
+ "pitch_max": 1100.0,
181
+ "pitch_min": 50.0,
182
+ "pitch_norm": false,
183
+ "real_dir": "reals",
184
+ "ref_level_db": 20,
185
+ "sample_rate": 24000,
186
+ "spk2id": "singers.json",
187
+ "train_file": "train.json",
188
+ "trim_fft_size": 512,
189
+ "trim_hop_size": 128,
190
+ "trim_silence": false,
191
+ "trim_top_db": 30,
192
+ "trimmed_wav_dir": "trimmed_wavs",
193
+ "use_amplitude_phase": false,
194
+ "use_audio": true,
195
+ "use_dur": false,
196
+ "use_emoid": false,
197
+ "use_frame_duration": false,
198
+ "use_frame_energy": false,
199
+ "use_frame_pitch": false,
200
+ "use_lab": false,
201
+ "use_label": false,
202
+ "use_linear": false,
203
+ "use_log_scale_energy": false,
204
+ "use_log_scale_pitch": false,
205
+ "use_mel": true,
206
+ "use_min_max_norm_mel": false,
207
+ "use_one_hot": false,
208
+ "use_phn_seq": false,
209
+ "use_phone": false,
210
+ "use_phone_duration": false,
211
+ "use_phone_energy": false,
212
+ "use_phone_pitch": false,
213
+ "use_spkid": false,
214
+ "use_text": false,
215
+ "use_uv": false,
216
+ "use_wav": false,
217
+ "use_wenet": false,
218
+ "utt2emo": "utt2emo",
219
+ "utt2spk": "utt2spk",
220
+ "uv_dir": "uvs",
221
+ "valid_file": "test.json",
222
+ "wav_dir": "wavs",
223
+ "wenet_dir": "wenet",
224
+ "win_size": 1024,
225
+ },
226
+ "supported_model_type": [
227
+ "GANVocoder",
228
+ "Fastspeech2",
229
+ "DiffSVC",
230
+ "Transformer",
231
+ "EDM",
232
+ "CD",
233
+ ],
234
+ "train": {
235
+ "adamw": {
236
+ "adam_b1": 0.8,
237
+ "adam_b2": 0.99,
238
+ "lr": 0.0002,
239
+ },
240
+ "batch_size": 16,
241
+ "criterions": [
242
+ "feature",
243
+ "discriminator",
244
+ "generator",
245
+ "mel",
246
+ ],
247
+ "dataloader": {
248
+ "num_worker": 4,
249
+ "pin_memory": true,
250
+ },
251
+ "ddp": true,
252
+ "epochs": 50000,
253
+ "exponential_lr": {
254
+ "lr_decay": 0.999,
255
+ },
256
+ "gradient_accumulation_step": 1,
257
+ "keep_checkpoint_max": 5,
258
+ "max_epoch": 1000000,
259
+ "max_steps": 1000000,
260
+ "multi_speaker_training": false,
261
+ "random_seed": 114514,
262
+ "run_eval": [
263
+ true,
264
+ ],
265
+ "sampler": {
266
+ "drop_last": true,
267
+ "holistic_shuffle": true,
268
+ },
269
+ "save_checkpoint_stride": [
270
+ 50,
271
+ ],
272
+ "save_checkpoints_steps": 10000,
273
+ "save_summary_steps": 500,
274
+ "total_training_steps": 50000,
275
+ "tracker": [
276
+ "tensorboard",
277
+ ],
278
+ "valid_interval": 10000,
279
+ },
280
+ }
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/ckpts.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ []
3
+ ]
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5fb0647436a074b05d2c0503c858d694bfd7eb4ac67be1597dcd0880c2bbe8b
3
+ size 189448709
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/optimizer_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5a1192dd52c5ed1849269b371631a42d89dd99348af14628eeb32936805cab
3
+ size 767969069
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7097c8afa12e4f7f93c024ddde568e0288c1bc99de277e711d64972e7a38858c
3
+ size 94703001
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/pytorch_model_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8274f13e7d8f0cbd2a45810f3d7ac184234b74eba622b3bccb4928e381507e2d
3
+ size 118577541
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/pytorch_model_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34613c36c68bd754943919e0955ddbd48c2664fc2cefa14f2dc905c53079d63a
3
+ size 263128113
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/pytorch_model_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc81b27297dfd94d9dd216c678faa1323d5a34c267707deb6969a1fb7d66b6fa
3
+ size 1593185
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/pytorch_model_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d28f2b1a533da4ce377670cac75e77cb3855ac54b5ccb145fc551a855c896f3
3
+ size 1169411
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c4c0a3a856d84dd933bc33c2f13bc4f219fb6230ffafebcd9e562cf0431886
3
+ size 15691
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ead7b9a7d4eb95bd33a6dc062a4c59873f58e56d9b1c0ead06dd4594f05bfda
3
+ size 563
hifigan_speech/checkpoint/epoch-0200_step-2841939_loss-38.503523/scheduler_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbca31a86f051d55ec6013b6fd1191538c7e699f1cd1aa25fd1e2d3cbe7ddf05
3
+ size 631