Setsugesuka commited on
Commit
7a4e294
1 Parent(s): 8160377

Upload 2 files

Browse files
bigvgan_singing/400000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989df2350b502e1175cdb1d204d9f81c27ddf97fe1919db4fa2605631e4cab1d
3
+ size 1846939571
bigvgan_singing/args.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/vocoder/gan/exp_config_base.json",
3
+ "exp_name": "bigvgan_large",
4
+ "inference": {
5
+ "batch_size": 1,
6
+ },
7
+ "model": {
8
+ "bigvgan": {
9
+ "activation": "snakebeta",
10
+ "resblock": "1",
11
+ "resblock_dilation_sizes": [
12
+ [
13
+ 1,
14
+ 3,
15
+ 5,
16
+ ],
17
+ [
18
+ 1,
19
+ 3,
20
+ 5,
21
+ ],
22
+ [
23
+ 1,
24
+ 3,
25
+ 5,
26
+ ],
27
+ ],
28
+ "resblock_kernel_sizes": [
29
+ 3,
30
+ 7,
31
+ 11,
32
+ ],
33
+ "snake_logscale": true,
34
+ "upsample_initial_channel": 1536,
35
+ "upsample_kernel_sizes": [
36
+ 8,
37
+ 8,
38
+ 4,
39
+ 4,
40
+ 4,
41
+ 4,
42
+ ],
43
+ "upsample_rates": [
44
+ 4,
45
+ 4,
46
+ 2,
47
+ 2,
48
+ 2,
49
+ 2,
50
+ ],
51
+ },
52
+ "discriminators": [
53
+ "mpd",
54
+ "msstftd",
55
+ ],
56
+ "generator": "bigvgan",
57
+ "mpd": {
58
+ "discriminator_channel_multi": 1,
59
+ "mpd_reshapes": [
60
+ 2,
61
+ 3,
62
+ 5,
63
+ 7,
64
+ 11,
65
+ ],
66
+ "use_spectral_norm": false,
67
+ },
68
+ "mrd": {
69
+ "discriminator_channel_multi": 1,
70
+ "mrd_override": false,
71
+ "resolutions": [
72
+ [
73
+ 1024,
74
+ 120,
75
+ 600,
76
+ ],
77
+ [
78
+ 2048,
79
+ 240,
80
+ 1200,
81
+ ],
82
+ [
83
+ 512,
84
+ 50,
85
+ 240,
86
+ ],
87
+ ],
88
+ "use_spectral_norm": false,
89
+ },
90
+ "msstftd": {
91
+ "filters": 32,
92
+ },
93
+ },
94
+ "model_type": "GANVocoder",
95
+ "preprocess": {
96
+ "audio_dir": "audios",
97
+ "bits": 8,
98
+ "contentvec_dir": "contentvec",
99
+ "cut_mel_frame": 32,
100
+ "data_augment": false,
101
+ "dur_dir": "durs",
102
+ "duration_dir": "duration",
103
+ "emo2id": "emo2id.json",
104
+ "energy_dir": "energys",
105
+ "energy_extract_mode": "from_mel",
106
+ "energy_norm": false,
107
+ "extract_audio": true,
108
+ "extract_contentvec_feature": false,
109
+ "extract_duration": false,
110
+ "extract_energy": false,
111
+ "extract_label": false,
112
+ "extract_mcep": false,
113
+ "extract_mel": true,
114
+ "extract_mert_feature": false,
115
+ "extract_one_hot": false,
116
+ "extract_pitch": false,
117
+ "extract_uv": false,
118
+ "extract_wenet_feature": false,
119
+ "extract_whisper_feature": false,
120
+ "f0_max": 1100,
121
+ "f0_min": 50,
122
+ "file_lst": "file.lst",
123
+ "fmax": 12000,
124
+ "fmin": 0,
125
+ "hop_size": 256,
126
+ "is_mu_law": false,
127
+ "lab_dir": "labs",
128
+ "label_dir": "labels",
129
+ "mcep_dir": "mcep",
130
+ "mel_dir": "mels",
131
+ "mel_min_max_norm": false,
132
+ "min_level_db": -115,
133
+ "n_fft": 1024,
134
+ "n_mel": 100,
135
+ "num_silent_frames": 8,
136
+ "phone_seq_file": "phone_seq_file",
137
+ "pitch_bin": 256,
138
+ "pitch_dir": "pitches",
139
+ "pitch_extractor": "parselmouth",
140
+ "pitch_max": 1100.0,
141
+ "pitch_min": 50.0,
142
+ "pitch_norm": false,
143
+ "processed_dir": "processed_data",
144
+ "ref_level_db": 20,
145
+ "sample_rate": 24000,
146
+ "spk2id": "singers.json",
147
+ "train_file": "train.json",
148
+ "trim_fft_size": 512,
149
+ "trim_hop_size": 128,
150
+ "trim_silence": false,
151
+ "trim_top_db": 30,
152
+ "trimmed_wav_dir": "trimmed_wavs",
153
+ "use_audio": true,
154
+ "use_dur": false,
155
+ "use_emoid": false,
156
+ "use_frame_duration": false,
157
+ "use_frame_energy": false,
158
+ "use_frame_pitch": false,
159
+ "use_lab": false,
160
+ "use_label": false,
161
+ "use_log_scale_energy": false,
162
+ "use_log_scale_pitch": false,
163
+ "use_mel": true,
164
+ "use_one_hot": false,
165
+ "use_phn_seq": false,
166
+ "use_phone_duration": false,
167
+ "use_phone_energy": false,
168
+ "use_phone_pitch": false,
169
+ "use_spkid": false,
170
+ "use_uv": false,
171
+ "use_wav": false,
172
+ "use_wenet": false,
173
+ "utt2emo": "utt2emo",
174
+ "utt2spk": "utt2spk",
175
+ "uv_dir": "uvs",
176
+ "valid_file": "test.json",
177
+ "wav_dir": "wavs",
178
+ "wenet_dir": "wenet",
179
+ "win_size": 1024,
180
+ },
181
+ "supported_model_type": [
182
+ "GANVocoder",
183
+ "Fastspeech2",
184
+ "DiffSVC",
185
+ "Transformer",
186
+ "EDM",
187
+ "CD",
188
+ ],
189
+ "train": {
190
+ "adamw": {
191
+ "adam_b1": 0.8,
192
+ "adam_b2": 0.99,
193
+ "lr": 0.0002,
194
+ },
195
+ "batch_size": 4,
196
+ "criterions": [
197
+ "feature",
198
+ "discriminator",
199
+ "generator",
200
+ "mel",
201
+ ],
202
+ "dataloader": {
203
+ "num_worker": 4,
204
+ "pin_memory": true,
205
+ },
206
+ "ddp": true,
207
+ "epochs": 50000,
208
+ "exponential_lr": {
209
+ "lr_decay": 0.999,
210
+ },
211
+ "gradient_accumulation_step": 1,
212
+ "keep_checkpoint_max": 5,
213
+ "max_epoch": 1000000,
214
+ "max_steps": 1000000,
215
+ "multi_speaker_training": false,
216
+ "random_seed": 114514,
217
+ "run_eval": [
218
+ true,
219
+ ],
220
+ "sampler": {
221
+ "drop_last": true,
222
+ "holistic_shuffle": true,
223
+ },
224
+ "save_checkpoint_stride": [
225
+ 200,
226
+ ],
227
+ "save_checkpoints_steps": 10000,
228
+ "save_summary_steps": 500,
229
+ "total_training_steps": 50000,
230
+ "tracker": [
231
+ "tensorboard",
232
+ ],
233
+ "valid_interval": 10000,
234
+ },
235
+ }