ChrisPreston commited on
Commit
2332176
1 Parent(s): 89c8aec

Upload 2 files

Browse files
aqua/clean_model_ckpt_steps_100000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d1e9bf1dde30fcb397ebf91e61e77fc34cf22f6d1d6fd112eba57113a75795
3
+ size 227124201
aqua/config.yaml ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 128
4
+ audio_sample_rate: 44100
5
+ binarization_args:
6
+ shuffle: false
7
+ with_align: true
8
+ with_f0: true
9
+ with_hubert: true
10
+ with_spk_embed: false
11
+ with_wav: false
12
+ binarizer_cls: preprocessing.SVCpre.SVCBinarizer
13
+ binary_data_dir: data/binary/aquapre
14
+ check_val_every_n_epoch: 10
15
+ choose_test_manually: false
16
+ clip_grad_norm: 1
17
+ config_path: F:\diff-svc-main\training\config_nsf.yaml
18
+ content_cond_steps: []
19
+ cwt_add_f0_loss: false
20
+ cwt_hidden_size: 128
21
+ cwt_layers: 2
22
+ cwt_loss: l1
23
+ cwt_std_scale: 0.8
24
+ datasets:
25
+ - opencpop
26
+ debug: false
27
+ dec_ffn_kernel_size: 9
28
+ dec_layers: 4
29
+ decay_steps: 20000
30
+ decoder_type: fft
31
+ dict_dir: ''
32
+ diff_decoder_type: wavenet
33
+ diff_loss_type: l2
34
+ dilation_cycle_length: 4
35
+ dropout: 0.1
36
+ ds_workers: 4
37
+ dur_enc_hidden_stride_kernel:
38
+ - 0,2,3
39
+ - 0,2,3
40
+ - 0,1,3
41
+ dur_loss: mse
42
+ dur_predictor_kernel: 3
43
+ dur_predictor_layers: 5
44
+ enc_ffn_kernel_size: 9
45
+ enc_layers: 4
46
+ encoder_K: 8
47
+ encoder_type: fft
48
+ endless_ds: false
49
+ f0_bin: 256
50
+ f0_max: 1100.0
51
+ f0_min: 40.0
52
+ f0_static: '{"28.0": 0.07, "29.0": 0.03, "31.0": 0.05, "32.0": 0.08, "33.0": 0.12,
53
+ "34.0": 0.02, "35.0": 0.06, "36.0": 0.02, "37.0": 0.01, "38.0": 0.1, "39.0": 0.05,
54
+ "40.0": 0.09, "41.0": 0.14, "42.0": 0.16, "43.0": 0.03, "44.0": 0.42, "45.0": 0.74,
55
+ "46.0": 1.13, "47.0": 1.49, "48.0": 1.76, "49.0": 2.59, "50.0": 3.03, "51.0": 2.71,
56
+ "52.0": 1.93, "53.0": 1.11, "54.0": 0.78, "55.0": 3.33, "56.0": 20.38, "57.0": 69.6,
57
+ "58.0": 167.04, "59.0": 245.1, "60.0": 318.87, "61.0": 373.41, "62.0": 434.86, "63.0":
58
+ 415.63, "64.0": 448.97, "65.0": 452.99, "66.0": 474.88, "67.0": 471.54, "68.0":
59
+ 455.78, "69.0": 421.71, "70.0": 372.06, "71.0": 323.85, "72.0": 292.8, "73.0": 238.94,
60
+ "74.0": 190.5, "75.0": 132.86, "76.0": 88.03, "77.0": 53.16, "78.0": 32.96, "79.0":
61
+ 23.66, "80.0": 14.74, "81.0": 8.54, "82.0": 5.0, "83.0": 3.32, "84.0": 2.29, "85.0":
62
+ 0.91, "total_time": 6576.43}'
63
+ ffn_act: gelu
64
+ ffn_padding: SAME
65
+ fft_size: 2048
66
+ fmax: 16000
67
+ fmin: 40
68
+ fs2_ckpt: ''
69
+ gaussian_start: true
70
+ gen_dir_name: ''
71
+ gen_tgt_spk_id: -1
72
+ hidden_size: 256
73
+ hop_size: 512
74
+ hubert_gpu: true
75
+ hubert_path: checkpoints/hubert/hubert_soft.pt
76
+ infer: false
77
+ keep_bins: 128
78
+ lambda_commit: 0.25
79
+ lambda_energy: 0.0
80
+ lambda_f0: 1.0
81
+ lambda_ph_dur: 0.3
82
+ lambda_sent_dur: 1.0
83
+ lambda_uv: 1.0
84
+ lambda_word_dur: 1.0
85
+ load_ckpt: ''
86
+ log_interval: 100
87
+ loud_norm: false
88
+ lr: 0.0008
89
+ max_beta: 0.02
90
+ max_epochs: 3000
91
+ max_eval_sentences: 1
92
+ max_eval_tokens: 60000
93
+ max_frames: 42000
94
+ max_input_tokens: 6000
95
+ max_sentences: 88
96
+ max_tokens: 128000
97
+ max_updates: 1000000
98
+ mel_loss: ssim:0.5|l1:0.5
99
+ mel_vmax: 1.5
100
+ mel_vmin: -6.0
101
+ min_level_db: -120
102
+ no_fs2: true
103
+ norm_type: gn
104
+ num_ckpt_keep: 10
105
+ num_heads: 2
106
+ num_sanity_val_steps: 1
107
+ num_spk: 1
108
+ num_test_samples: 0
109
+ num_valid_plots: 10
110
+ optimizer_adam_beta1: 0.9
111
+ optimizer_adam_beta2: 0.98
112
+ out_wav_norm: false
113
+ pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
114
+ pe_enable: false
115
+ perform_enhance: true
116
+ pitch_ar: false
117
+ pitch_enc_hidden_stride_kernel:
118
+ - 0,2,5
119
+ - 0,2,5
120
+ - 0,2,5
121
+ pitch_extractor: parselmouth
122
+ pitch_loss: l2
123
+ pitch_norm: log
124
+ pitch_type: frame
125
+ pndm_speedup: 10
126
+ pre_align_args:
127
+ allow_no_txt: false
128
+ denoise: false
129
+ forced_align: mfa
130
+ txt_processor: zh_g2pM
131
+ use_sox: true
132
+ use_tone: false
133
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
134
+ predictor_dropout: 0.5
135
+ predictor_grad: 0.1
136
+ predictor_hidden: -1
137
+ predictor_kernel: 5
138
+ predictor_layers: 5
139
+ prenet_dropout: 0.5
140
+ prenet_hidden_size: 256
141
+ pretrain_fs_ckpt: ''
142
+ processed_data_dir: xxx
143
+ profile_infer: false
144
+ raw_data_dir: data/raw/aquapre
145
+ ref_norm_layer: bn
146
+ rel_pos: true
147
+ reset_phone_dict: true
148
+ residual_channels: 512
149
+ residual_layers: 20
150
+ save_best: false
151
+ save_ckpt: true
152
+ save_codes:
153
+ - configs
154
+ - modules
155
+ - src
156
+ - utils
157
+ save_f0: true
158
+ save_gt: false
159
+ schedule_type: linear
160
+ seed: 1234
161
+ sort_by_len: true
162
+ speaker_id: aqua
163
+ spec_max:
164
+ - 0.18377557396888733
165
+ - -0.33469653129577637
166
+ - -0.3073468506336212
167
+ - -0.21027648448944092
168
+ - 0.23178215324878693
169
+ - 0.5297451019287109
170
+ - 0.7021887898445129
171
+ - 0.7711099982261658
172
+ - 0.7912386059761047
173
+ - 0.6609739065170288
174
+ - 0.649876058101654
175
+ - 0.6327046751976013
176
+ - 0.6892049908638
177
+ - 0.6026111841201782
178
+ - 0.6834777593612671
179
+ - 0.7417489886283875
180
+ - 0.6040375828742981
181
+ - 0.5854794383049011
182
+ - 0.7123280167579651
183
+ - 0.5886657238006592
184
+ - 0.6135984063148499
185
+ - 0.5388530492782593
186
+ - 0.5932422280311584
187
+ - 0.535581111907959
188
+ - 0.57913738489151
189
+ - 0.6827316880226135
190
+ - 0.6265526413917542
191
+ - 0.6557696461677551
192
+ - 0.6586976647377014
193
+ - 0.5687282085418701
194
+ - 0.6218562722206116
195
+ - 0.6349128484725952
196
+ - 0.6176865100860596
197
+ - 0.6212958097457886
198
+ - 0.6277656555175781
199
+ - 0.5551338195800781
200
+ - 0.6126622557640076
201
+ - 0.5821346640586853
202
+ - 0.577056348323822
203
+ - 0.5649800300598145
204
+ - 0.5984634757041931
205
+ - 0.4873456656932831
206
+ - 0.47209471464157104
207
+ - 0.4387756586074829
208
+ - 0.4690910577774048
209
+ - 0.4616055190563202
210
+ - 0.3555675446987152
211
+ - 0.3898852467536926
212
+ - 0.3676068186759949
213
+ - 0.4632047414779663
214
+ - 0.37983986735343933
215
+ - 0.3877682685852051
216
+ - 0.3099276125431061
217
+ - 0.3261813223361969
218
+ - 0.34168118238449097
219
+ - 0.3004901111125946
220
+ - 0.3512653112411499
221
+ - 0.2647061347961426
222
+ - 0.2685043215751648
223
+ - 0.20390087366104126
224
+ - 0.1825377196073532
225
+ - 0.22067485749721527
226
+ - 0.20306138694286346
227
+ - 0.12710601091384888
228
+ - 0.10927848517894745
229
+ - 0.1117628887295723
230
+ - 0.14148156344890594
231
+ - 0.122605100274086
232
+ - 0.08032718300819397
233
+ - 0.12159623205661774
234
+ - -0.04923255369067192
235
+ - -0.07824847847223282
236
+ - 0.03441360592842102
237
+ - 0.07093964517116547
238
+ - -0.1269683688879013
239
+ - 0.0027632638812065125
240
+ - -0.045093610882759094
241
+ - -0.04115259647369385
242
+ - 0.029067598283290863
243
+ - -0.009453626349568367
244
+ - -0.0470033697783947
245
+ - -0.04894810542464256
246
+ - -0.06236470118165016
247
+ - -0.20086997747421265
248
+ - -0.2363593578338623
249
+ - -0.17289961874485016
250
+ - -0.219277486205101
251
+ - -0.2934815585613251
252
+ - -0.30551621317863464
253
+ - -0.2513120770454407
254
+ - -0.26792851090431213
255
+ - -0.33068278431892395
256
+ - -0.37532031536102295
257
+ - -0.365634560585022
258
+ - -0.3379015326499939
259
+ - -0.26979681849479675
260
+ - -0.20316314697265625
261
+ - -0.2109878957271576
262
+ - -0.16927000880241394
263
+ - -0.1698305308818817
264
+ - -0.2739156186580658
265
+ - -0.2700604200363159
266
+ - -0.32284122705459595
267
+ - -0.44529229402542114
268
+ - -0.4002469480037689
269
+ - -0.2441970407962799
270
+ - -0.19795942306518555
271
+ - -0.2462945580482483
272
+ - -0.0673084482550621
273
+ - -0.22117790579795837
274
+ - -0.21418607234954834
275
+ - -0.39467209577560425
276
+ - -0.4388139843940735
277
+ - -0.3227368891239166
278
+ - -0.30530503392219543
279
+ - -0.3201104998588562
280
+ - -0.39839836955070496
281
+ - -0.464596688747406
282
+ - -0.5399728417396545
283
+ - -0.5515261292457581
284
+ - -0.520453691482544
285
+ - -0.6714966893196106
286
+ - -0.6414765119552612
287
+ - -0.6108742356300354
288
+ - -0.6762520670890808
289
+ - -0.7067146301269531
290
+ - -0.7586700320243835
291
+ - -0.6640384793281555
292
+ spec_min:
293
+ - -4.999994277954102
294
+ - -4.999994277954102
295
+ - -4.999994277954102
296
+ - -4.999994277954102
297
+ - -4.999994277954102
298
+ - -4.999994277954102
299
+ - -4.999994277954102
300
+ - -4.999994277954102
301
+ - -4.999994277954102
302
+ - -4.999994277954102
303
+ - -4.999994277954102
304
+ - -4.999994277954102
305
+ - -4.999994277954102
306
+ - -4.999994277954102
307
+ - -4.999994277954102
308
+ - -4.999994277954102
309
+ - -4.999994277954102
310
+ - -4.999994277954102
311
+ - -4.999994277954102
312
+ - -4.999994277954102
313
+ - -4.999994277954102
314
+ - -4.999994277954102
315
+ - -4.999994277954102
316
+ - -4.999994277954102
317
+ - -4.999994277954102
318
+ - -4.999994277954102
319
+ - -4.999994277954102
320
+ - -4.999994277954102
321
+ - -4.999994277954102
322
+ - -4.999994277954102
323
+ - -4.999994277954102
324
+ - -4.999994277954102
325
+ - -4.999994277954102
326
+ - -4.999994277954102
327
+ - -4.999994277954102
328
+ - -4.999994277954102
329
+ - -4.999994277954102
330
+ - -4.999994277954102
331
+ - -4.999994277954102
332
+ - -4.999994277954102
333
+ - -4.999994277954102
334
+ - -4.999994277954102
335
+ - -4.999994277954102
336
+ - -4.999994277954102
337
+ - -4.999994277954102
338
+ - -4.999994277954102
339
+ - -4.999994277954102
340
+ - -4.999994277954102
341
+ - -4.999994277954102
342
+ - -4.999994277954102
343
+ - -4.999994277954102
344
+ - -4.999994277954102
345
+ - -4.999994277954102
346
+ - -4.999994277954102
347
+ - -4.999994277954102
348
+ - -4.999994277954102
349
+ - -4.999994277954102
350
+ - -4.999994277954102
351
+ - -4.999994277954102
352
+ - -4.999994277954102
353
+ - -4.999994277954102
354
+ - -4.999994277954102
355
+ - -4.999994277954102
356
+ - -4.999994277954102
357
+ - -4.999994277954102
358
+ - -4.999994277954102
359
+ - -4.999994277954102
360
+ - -4.999994277954102
361
+ - -4.999994277954102
362
+ - -4.999994277954102
363
+ - -4.999994277954102
364
+ - -4.999994277954102
365
+ - -4.999994277954102
366
+ - -4.999994277954102
367
+ - -4.999994277954102
368
+ - -4.999994277954102
369
+ - -4.999994277954102
370
+ - -4.999994277954102
371
+ - -4.999994277954102
372
+ - -4.999994277954102
373
+ - -4.999994277954102
374
+ - -4.999994277954102
375
+ - -4.999994277954102
376
+ - -4.999994277954102
377
+ - -4.999994277954102
378
+ - -4.999994277954102
379
+ - -4.999994277954102
380
+ - -4.999994277954102
381
+ - -4.999994277954102
382
+ - -4.999994277954102
383
+ - -4.999994277954102
384
+ - -4.999994277954102
385
+ - -4.999994277954102
386
+ - -4.999994277954102
387
+ - -4.999994277954102
388
+ - -4.999994277954102
389
+ - -4.999994277954102
390
+ - -4.999994277954102
391
+ - -4.999994277954102
392
+ - -4.999994277954102
393
+ - -4.999994277954102
394
+ - -4.999994277954102
395
+ - -4.999994277954102
396
+ - -4.999994277954102
397
+ - -4.999994277954102
398
+ - -4.999994277954102
399
+ - -4.999994277954102
400
+ - -4.999994277954102
401
+ - -4.999994277954102
402
+ - -4.999994277954102
403
+ - -4.999994277954102
404
+ - -4.999994277954102
405
+ - -4.999994277954102
406
+ - -4.999994277954102
407
+ - -4.999994277954102
408
+ - -4.999994277954102
409
+ - -4.999994277954102
410
+ - -4.999994277954102
411
+ - -4.999994277954102
412
+ - -4.999994277954102
413
+ - -4.999994277954102
414
+ - -4.999994277954102
415
+ - -4.999994277954102
416
+ - -4.999994277954102
417
+ - -4.999994277954102
418
+ - -4.999994277954102
419
+ - -4.989471912384033
420
+ - -4.999994277954102
421
+ spk_cond_steps: []
422
+ stop_token_weight: 5.0
423
+ task_cls: training.task.SVC_task.SVCTask
424
+ test_ids: []
425
+ test_input_dir: ''
426
+ test_num: 0
427
+ test_prefixes:
428
+ - test
429
+ test_set_name: test
430
+ timesteps: 1000
431
+ train_set_name: train
432
+ use_cn_hubert: false
433
+ use_crepe: true
434
+ use_denoise: false
435
+ use_energy_embed: false
436
+ use_gt_dur: false
437
+ use_gt_f0: false
438
+ use_midi: false
439
+ use_nsf: true
440
+ use_pitch_embed: true
441
+ use_pos_embed: true
442
+ use_spk_embed: false
443
+ use_spk_id: false
444
+ use_split_spk_id: false
445
+ use_uv: false
446
+ use_var_enc: false
447
+ use_vec: false
448
+ val_check_interval: 2000
449
+ valid_num: 0
450
+ valid_set_name: valid
451
+ vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
452
+ vocoder_ckpt: checkpoints/nsf_hifigan/model
453
+ warmup_updates: 2000
454
+ wav2spec_eps: 1e-6
455
+ weight_decay: 0
456
+ win_size: 2048
457
+ work_dir: checkpoints/aquapre