ddd commited on
Commit
c4e83e4
1 Parent(s): e12afec
checkpoints/0831_opencpop_ds1000/config.yaml ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 80
4
+ audio_sample_rate: 24000
5
+ base_config:
6
+ - usr/configs/popcs_ds_beta6.yaml
7
+ - usr/configs/midi/cascade/opencs/opencpop_statis.yaml
8
+ binarization_args:
9
+ shuffle: false
10
+ with_align: true
11
+ with_f0: true
12
+ with_f0cwt: true
13
+ with_spk_embed: false
14
+ with_txt: true
15
+ with_wav: true
16
+ binarizer_cls: data_gen.singing.binarize.OpencpopBinarizer
17
+ binary_data_dir: data/binary/opencpop-midi-dp
18
+ check_val_every_n_epoch: 10
19
+ clip_grad_norm: 1
20
+ content_cond_steps: []
21
+ cwt_add_f0_loss: false
22
+ cwt_hidden_size: 128
23
+ cwt_layers: 2
24
+ cwt_loss: l1
25
+ cwt_std_scale: 0.8
26
+ datasets:
27
+ - opencpop
28
+ debug: false
29
+ dec_ffn_kernel_size: 9
30
+ dec_layers: 4
31
+ decay_steps: 50000
32
+ decoder_type: fft
33
+ dict_dir: ''
34
+ diff_decoder_type: wavenet
35
+ diff_loss_type: l1
36
+ dilation_cycle_length: 4
37
+ dropout: 0.1
38
+ ds_workers: 4
39
+ dur_enc_hidden_stride_kernel:
40
+ - 0,2,3
41
+ - 0,2,3
42
+ - 0,1,3
43
+ dur_loss: mse
44
+ dur_predictor_kernel: 3
45
+ dur_predictor_layers: 5
46
+ enc_ffn_kernel_size: 9
47
+ enc_layers: 4
48
+ encoder_K: 8
49
+ encoder_type: fft
50
+ endless_ds: true
51
+ ffn_act: gelu
52
+ ffn_padding: SAME
53
+ fft_size: 512
54
+ fmax: 12000
55
+ fmin: 30
56
+ fs2_ckpt: ''
57
+ gaussian_start: true
58
+ gen_dir_name: ''
59
+ gen_tgt_spk_id: -1
60
+ hidden_size: 256
61
+ hop_size: 128
62
+ infer: false
63
+ keep_bins: 80
64
+ lambda_commit: 0.25
65
+ lambda_energy: 0.0
66
+ lambda_f0: 0.0
67
+ lambda_ph_dur: 1.0
68
+ lambda_sent_dur: 1.0
69
+ lambda_uv: 0.0
70
+ lambda_word_dur: 1.0
71
+ load_ckpt: ''
72
+ log_interval: 100
73
+ loud_norm: false
74
+ lr: 0.001
75
+ max_beta: 0.02
76
+ max_epochs: 1000
77
+ max_eval_sentences: 1
78
+ max_eval_tokens: 60000
79
+ max_frames: 8000
80
+ max_input_tokens: 1550
81
+ max_sentences: 48
82
+ max_tokens: 36000
83
+ max_updates: 320000
84
+ mel_loss: ssim:0.5|l1:0.5
85
+ mel_vmax: 1.5
86
+ mel_vmin: -6.0
87
+ min_level_db: -120
88
+ norm_type: gn
89
+ num_ckpt_keep: 3
90
+ num_heads: 2
91
+ num_sanity_val_steps: 1
92
+ num_spk: 1
93
+ num_test_samples: 0
94
+ num_valid_plots: 10
95
+ optimizer_adam_beta1: 0.9
96
+ optimizer_adam_beta2: 0.98
97
+ out_wav_norm: false
98
+ pe_ckpt: checkpoints/0102_xiaoma_pe
99
+ pe_enable: true
100
+ pitch_ar: false
101
+ pitch_enc_hidden_stride_kernel:
102
+ - 0,2,5
103
+ - 0,2,5
104
+ - 0,2,5
105
+ pitch_extractor: parselmouth
106
+ pitch_loss: l1
107
+ pitch_norm: log
108
+ pitch_type: frame
109
+ pre_align_args:
110
+ allow_no_txt: false
111
+ denoise: false
112
+ forced_align: mfa
113
+ txt_processor: zh_g2pM
114
+ use_sox: true
115
+ use_tone: false
116
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
117
+ predictor_dropout: 0.5
118
+ predictor_grad: 0.1
119
+ predictor_hidden: -1
120
+ predictor_kernel: 5
121
+ predictor_layers: 5
122
+ prenet_dropout: 0.5
123
+ prenet_hidden_size: 256
124
+ pretrain_fs_ckpt: ''
125
+ processed_data_dir: xxx
126
+ profile_infer: false
127
+ raw_data_dir: data/raw/opencpop/segments
128
+ ref_norm_layer: bn
129
+ rel_pos: true
130
+ reset_phone_dict: true
131
+ residual_channels: 256
132
+ residual_layers: 20
133
+ save_best: false
134
+ save_ckpt: true
135
+ save_codes:
136
+ - configs
137
+ - modules
138
+ - tasks
139
+ - utils
140
+ - usr
141
+ save_f0: true
142
+ save_gt: false
143
+ schedule_type: linear
144
+ seed: 1234
145
+ sort_by_len: true
146
+ spec_max:
147
+ - -0.79453
148
+ - -0.81116
149
+ - -0.61631
150
+ - -0.30679
151
+ - -0.13863
152
+ - -0.050652
153
+ - -0.11563
154
+ - -0.10679
155
+ - -0.091068
156
+ - -0.062174
157
+ - -0.075302
158
+ - -0.072217
159
+ - -0.063815
160
+ - -0.073299
161
+ - 0.007361
162
+ - -0.072508
163
+ - -0.050234
164
+ - -0.16534
165
+ - -0.26928
166
+ - -0.20782
167
+ - -0.20823
168
+ - -0.11702
169
+ - -0.070128
170
+ - -0.065868
171
+ - -0.012675
172
+ - 0.0015121
173
+ - -0.089902
174
+ - -0.21392
175
+ - -0.23789
176
+ - -0.28922
177
+ - -0.30405
178
+ - -0.23029
179
+ - -0.22088
180
+ - -0.21542
181
+ - -0.29367
182
+ - -0.30137
183
+ - -0.38281
184
+ - -0.4359
185
+ - -0.28681
186
+ - -0.46855
187
+ - -0.57485
188
+ - -0.47022
189
+ - -0.54266
190
+ - -0.44848
191
+ - -0.6412
192
+ - -0.687
193
+ - -0.6486
194
+ - -0.76436
195
+ - -0.49971
196
+ - -0.71068
197
+ - -0.69724
198
+ - -0.61487
199
+ - -0.55843
200
+ - -0.69773
201
+ - -0.57502
202
+ - -0.70919
203
+ - -0.82431
204
+ - -0.84213
205
+ - -0.90431
206
+ - -0.8284
207
+ - -0.77945
208
+ - -0.82758
209
+ - -0.87699
210
+ - -1.0532
211
+ - -1.0766
212
+ - -1.1198
213
+ - -1.0185
214
+ - -0.98983
215
+ - -1.0001
216
+ - -1.0756
217
+ - -1.0024
218
+ - -1.0304
219
+ - -1.0579
220
+ - -1.0188
221
+ - -1.05
222
+ - -1.0842
223
+ - -1.0923
224
+ - -1.1223
225
+ - -1.2381
226
+ - -1.6467
227
+ spec_min:
228
+ - -6.0
229
+ - -6.0
230
+ - -6.0
231
+ - -6.0
232
+ - -6.0
233
+ - -6.0
234
+ - -6.0
235
+ - -6.0
236
+ - -6.0
237
+ - -6.0
238
+ - -6.0
239
+ - -6.0
240
+ - -6.0
241
+ - -6.0
242
+ - -6.0
243
+ - -6.0
244
+ - -6.0
245
+ - -6.0
246
+ - -6.0
247
+ - -6.0
248
+ - -6.0
249
+ - -6.0
250
+ - -6.0
251
+ - -6.0
252
+ - -6.0
253
+ - -6.0
254
+ - -6.0
255
+ - -6.0
256
+ - -6.0
257
+ - -6.0
258
+ - -6.0
259
+ - -6.0
260
+ - -6.0
261
+ - -6.0
262
+ - -6.0
263
+ - -6.0
264
+ - -6.0
265
+ - -6.0
266
+ - -6.0
267
+ - -6.0
268
+ - -6.0
269
+ - -6.0
270
+ - -6.0
271
+ - -6.0
272
+ - -6.0
273
+ - -6.0
274
+ - -6.0
275
+ - -6.0
276
+ - -6.0
277
+ - -6.0
278
+ - -6.0
279
+ - -6.0
280
+ - -6.0
281
+ - -6.0
282
+ - -6.0
283
+ - -6.0
284
+ - -6.0
285
+ - -6.0
286
+ - -6.0
287
+ - -6.0
288
+ - -6.0
289
+ - -6.0
290
+ - -6.0
291
+ - -6.0
292
+ - -6.0
293
+ - -6.0
294
+ - -6.0
295
+ - -6.0
296
+ - -6.0
297
+ - -6.0
298
+ - -6.0
299
+ - -6.0
300
+ - -6.0
301
+ - -6.0
302
+ - -6.0
303
+ - -6.0
304
+ - -6.0
305
+ - -6.0
306
+ - -6.0
307
+ - -6.0
308
+ spk_cond_steps: []
309
+ stop_token_weight: 5.0
310
+ task_cls: usr.diffsinger_task.DiffSingerMIDITask
311
+ test_ids: []
312
+ test_input_dir: ''
313
+ test_num: 0
314
+ test_prefixes:
315
+ - '2044'
316
+ - '2086'
317
+ - '2092'
318
+ - '2093'
319
+ - '2100'
320
+ test_set_name: test
321
+ timesteps: 1000
322
+ train_set_name: train
323
+ use_denoise: false
324
+ use_energy_embed: false
325
+ use_gt_dur: false
326
+ use_gt_f0: false
327
+ use_midi: true
328
+ use_nsf: true
329
+ use_pitch_embed: false
330
+ use_pos_embed: true
331
+ use_spk_embed: false
332
+ use_spk_id: false
333
+ use_split_spk_id: false
334
+ use_uv: true
335
+ use_var_enc: false
336
+ val_check_interval: 2000
337
+ valid_num: 0
338
+ valid_set_name: valid
339
+ vocoder: vocoders.hifigan.HifiGAN
340
+ vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128
341
+ warmup_updates: 2000
342
+ wav2spec_eps: 1e-6
343
+ weight_decay: 0
344
+ win_size: 512
345
+ work_dir: checkpoints/0831_opencpop_ds1000
checkpoints/0831_opencpop_ds1000/model_ckpt_steps_320000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:954a31208ee6afb6240d09454bb204c4fbc63cf70e2586bed0ab29b1dc964c9e
3
+ size 170269591
usr/configs/midi/e2e/opencpop/ds1000-10dil.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_config:
2
+ - usr/configs/popcs_ds_beta6.yaml
3
+ - usr/configs/midi/cascade/opencs/opencpop_statis.yaml
4
+
5
+ binarizer_cls: data_gen.singing.binarize.OpencpopBinarizer
6
+ binary_data_dir: 'data/binary/opencpop-midi-dp'
7
+
8
+ #switch_midi2f0_step: 174000
9
+ use_midi: true # for midi exp
10
+ use_gt_dur: false # for further midi exp
11
+ lambda_ph_dur: 1.0
12
+ lambda_sent_dur: 1.0
13
+ lambda_word_dur: 1.0
14
+ predictor_grad: 0.1
15
+ dur_predictor_layers: 5 # *
16
+
17
+
18
+ fs2_ckpt: '' #
19
+ #num_valid_plots: 0
20
+ task_cls: usr.diffsinger_task.DiffSingerMIDITask
21
+
22
+ timesteps: 1000
23
+ K_step: 1000
24
+ max_beta: 0.02
25
+ max_tokens: 36000
26
+ max_updates: 320000
27
+ gaussian_start: True
28
+
29
+ use_pitch_embed: false
30
+ use_gt_f0: false # for midi exp
31
+
32
+ lambda_f0: 0.
33
+ lambda_uv: 0.
34
+ dilation_cycle_length: 10 # *
35
+ rel_pos: true
36
+ predictor_layers: 5
37
+ pe_enable: true
38
+ pe_ckpt: 'checkpoints/0102_xiaoma_pe'
usr/configs/midi/e2e/opencpop/ds1000.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_config:
2
+ - usr/configs/popcs_ds_beta6.yaml
3
+ - usr/configs/midi/cascade/opencs/opencpop_statis.yaml
4
+
5
+ binarizer_cls: data_gen.singing.binarize.OpencpopBinarizer
6
+ binary_data_dir: 'data/binary/opencpop-midi-dp'
7
+
8
+ #switch_midi2f0_step: 174000
9
+ use_midi: true # for midi exp
10
+ use_gt_dur: false # for further midi exp
11
+ lambda_ph_dur: 1.0
12
+ lambda_sent_dur: 1.0
13
+ lambda_word_dur: 1.0
14
+ predictor_grad: 0.1
15
+ dur_predictor_layers: 5 # *
16
+
17
+
18
+ fs2_ckpt: '' #
19
+ #num_valid_plots: 0
20
+ task_cls: usr.diffsinger_task.DiffSingerMIDITask
21
+
22
+ # for diffusion schedule
23
+ timesteps: 1000
24
+ K_step: 1000
25
+ max_beta: 0.02
26
+ max_tokens: 36000
27
+ max_updates: 320000
28
+ gaussian_start: True
29
+ pndm_speedup: 10
30
+
31
+ use_pitch_embed: false
32
+ use_gt_f0: false # for midi exp
33
+
34
+ lambda_f0: 0.
35
+ lambda_uv: 0.
36
+ dilation_cycle_length: 4 # *
37
+ rel_pos: true
38
+ predictor_layers: 5
39
+ pe_enable: true
40
+ pe_ckpt: 'checkpoints/0102_xiaoma_pe'
41
+
42
+
usr/configs/midi/e2e/opencpop/ds100_adj_rel.yaml CHANGED
@@ -20,7 +20,7 @@ fs2_ckpt: '' #
20
  task_cls: usr.diffsinger_task.DiffSingerMIDITask
21
 
22
  K_step: 100
23
- max_tokens: 40000
24
  max_updates: 160000
25
  gaussian_start: True
26
 
 
20
  task_cls: usr.diffsinger_task.DiffSingerMIDITask
21
 
22
  K_step: 100
23
+ max_tokens: 36000
24
  max_updates: 160000
25
  gaussian_start: True
26