ddd commited on
Commit
871a48f
1 Parent(s): dbb6dab

add .gitattributes

Browse files
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ model_ckpt_steps* filter=lfs diff=lfs merge=lfs -text
checkpoints/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ model_ckpt_steps* filter=lfs diff=lfs merge=lfs -text
checkpoints/0102_xiaoma_pe/config.yaml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ audio_num_mel_bins: 80
3
+ audio_sample_rate: 24000
4
+ base_config:
5
+ - configs/tts/lj/fs2.yaml
6
+ binarization_args:
7
+ shuffle: false
8
+ with_align: true
9
+ with_f0: true
10
+ with_f0cwt: true
11
+ with_spk_embed: true
12
+ with_txt: true
13
+ with_wav: false
14
+ binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
15
+ binary_data_dir: data/binary/xiaoma1022_24k_128hop
16
+ check_val_every_n_epoch: 10
17
+ clip_grad_norm: 1
18
+ cwt_add_f0_loss: false
19
+ cwt_hidden_size: 128
20
+ cwt_layers: 2
21
+ cwt_loss: l1
22
+ cwt_std_scale: 0.8
23
+ debug: false
24
+ dec_ffn_kernel_size: 9
25
+ dec_layers: 4
26
+ decoder_type: fft
27
+ dict_dir: ''
28
+ dropout: 0.1
29
+ ds_workers: 4
30
+ dur_enc_hidden_stride_kernel:
31
+ - 0,2,3
32
+ - 0,2,3
33
+ - 0,1,3
34
+ dur_loss: mse
35
+ dur_predictor_kernel: 3
36
+ dur_predictor_layers: 2
37
+ enc_ffn_kernel_size: 9
38
+ enc_layers: 4
39
+ encoder_K: 8
40
+ encoder_type: fft
41
+ endless_ds: true
42
+ ffn_act: gelu
43
+ ffn_padding: SAME
44
+ fft_size: 512
45
+ fmax: 12000
46
+ fmin: 30
47
+ gen_dir_name: ''
48
+ hidden_size: 256
49
+ hop_size: 128
50
+ infer: false
51
+ lambda_commit: 0.25
52
+ lambda_energy: 0.1
53
+ lambda_f0: 1.0
54
+ lambda_ph_dur: 1.0
55
+ lambda_sent_dur: 1.0
56
+ lambda_uv: 1.0
57
+ lambda_word_dur: 1.0
58
+ load_ckpt: ''
59
+ log_interval: 100
60
+ loud_norm: false
61
+ lr: 2.0
62
+ max_epochs: 1000
63
+ max_eval_sentences: 1
64
+ max_eval_tokens: 60000
65
+ max_frames: 5000
66
+ max_input_tokens: 1550
67
+ max_sentences: 100000
68
+ max_tokens: 20000
69
+ max_updates: 60000
70
+ mel_loss: l1
71
+ mel_vmax: 1.5
72
+ mel_vmin: -6
73
+ min_level_db: -120
74
+ norm_type: gn
75
+ num_ckpt_keep: 3
76
+ num_heads: 2
77
+ num_sanity_val_steps: 5
78
+ num_spk: 1
79
+ num_test_samples: 20
80
+ num_valid_plots: 10
81
+ optimizer_adam_beta1: 0.9
82
+ optimizer_adam_beta2: 0.98
83
+ out_wav_norm: false
84
+ pitch_ar: false
85
+ pitch_enc_hidden_stride_kernel:
86
+ - 0,2,5
87
+ - 0,2,5
88
+ - 0,2,5
89
+ pitch_extractor_conv_layers: 2
90
+ pitch_loss: l1
91
+ pitch_norm: log
92
+ pitch_type: frame
93
+ pre_align_args:
94
+ allow_no_txt: false
95
+ denoise: false
96
+ forced_align: mfa
97
+ txt_processor: en
98
+ use_sox: false
99
+ use_tone: true
100
+ pre_align_cls: data_gen.tts.lj.pre_align.LJPreAlign
101
+ predictor_dropout: 0.5
102
+ predictor_grad: 0.1
103
+ predictor_hidden: -1
104
+ predictor_kernel: 5
105
+ predictor_layers: 2
106
+ prenet_dropout: 0.5
107
+ prenet_hidden_size: 256
108
+ pretrain_fs_ckpt: ''
109
+ processed_data_dir: data/processed/ljspeech
110
+ profile_infer: false
111
+ raw_data_dir: data/raw/LJSpeech-1.1
112
+ ref_norm_layer: bn
113
+ reset_phone_dict: true
114
+ save_best: false
115
+ save_ckpt: true
116
+ save_codes:
117
+ - configs
118
+ - modules
119
+ - tasks
120
+ - utils
121
+ - usr
122
+ save_f0: false
123
+ save_gt: false
124
+ seed: 1234
125
+ sort_by_len: true
126
+ stop_token_weight: 5.0
127
+ task_cls: tasks.tts.pe.PitchExtractionTask
128
+ test_ids:
129
+ - 68
130
+ - 70
131
+ - 74
132
+ - 87
133
+ - 110
134
+ - 172
135
+ - 190
136
+ - 215
137
+ - 231
138
+ - 294
139
+ - 316
140
+ - 324
141
+ - 402
142
+ - 422
143
+ - 485
144
+ - 500
145
+ - 505
146
+ - 508
147
+ - 509
148
+ - 519
149
+ test_input_dir: ''
150
+ test_num: 523
151
+ test_set_name: test
152
+ train_set_name: train
153
+ use_denoise: false
154
+ use_energy_embed: false
155
+ use_gt_dur: false
156
+ use_gt_f0: false
157
+ use_pitch_embed: true
158
+ use_pos_embed: true
159
+ use_spk_embed: false
160
+ use_spk_id: false
161
+ use_split_spk_id: false
162
+ use_uv: true
163
+ use_var_enc: false
164
+ val_check_interval: 2000
165
+ valid_num: 348
166
+ valid_set_name: valid
167
+ vocoder: pwg
168
+ vocoder_ckpt: ''
169
+ warmup_updates: 2000
170
+ weight_decay: 0
171
+ win_size: 512
172
+ work_dir: checkpoints/0102_xiaoma_pe
checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53942abd8cb908b6d161e1ad7ff3d7d0dd6b204d5bf050613c9d00c56b185ceb
3
+ size 13047222
checkpoints/0109_hifigan_bigpopcs_hop128/config.yaml ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ adam_b1: 0.8
3
+ adam_b2: 0.99
4
+ amp: false
5
+ audio_num_mel_bins: 80
6
+ audio_sample_rate: 24000
7
+ aux_context_window: 0
8
+ #base_config:
9
+ #- egs/egs_bases/singing/pwg.yaml
10
+ #- egs/egs_bases/tts/vocoder/hifigan.yaml
11
+ binarization_args:
12
+ reset_phone_dict: true
13
+ reset_word_dict: true
14
+ shuffle: false
15
+ trim_eos_bos: false
16
+ trim_sil: false
17
+ with_align: false
18
+ with_f0: true
19
+ with_f0cwt: false
20
+ with_linear: false
21
+ with_spk_embed: false
22
+ with_spk_id: true
23
+ with_txt: false
24
+ with_wav: true
25
+ with_word: false
26
+ binarizer_cls: data_gen.tts.singing.binarize.SingingBinarizer
27
+ binary_data_dir: data/binary/big_popcs_24k_hop128
28
+ check_val_every_n_epoch: 10
29
+ clip_grad_norm: 1
30
+ clip_grad_value: 0
31
+ datasets: []
32
+ debug: false
33
+ dec_ffn_kernel_size: 9
34
+ dec_layers: 4
35
+ dict_dir: ''
36
+ disc_start_steps: 40000
37
+ discriminator_grad_norm: 1
38
+ discriminator_optimizer_params:
39
+ eps: 1.0e-06
40
+ lr: 0.0002
41
+ weight_decay: 0.0
42
+ discriminator_params:
43
+ bias: true
44
+ conv_channels: 64
45
+ in_channels: 1
46
+ kernel_size: 3
47
+ layers: 10
48
+ nonlinear_activation: LeakyReLU
49
+ nonlinear_activation_params:
50
+ negative_slope: 0.2
51
+ out_channels: 1
52
+ use_weight_norm: true
53
+ discriminator_scheduler_params:
54
+ gamma: 0.999
55
+ step_size: 600
56
+ dropout: 0.1
57
+ ds_workers: 1
58
+ enc_ffn_kernel_size: 9
59
+ enc_layers: 4
60
+ endless_ds: true
61
+ ffn_act: gelu
62
+ ffn_padding: SAME
63
+ fft_size: 512
64
+ fmax: 12000
65
+ fmin: 30
66
+ frames_multiple: 1
67
+ gen_dir_name: ''
68
+ generator_grad_norm: 10
69
+ generator_optimizer_params:
70
+ eps: 1.0e-06
71
+ lr: 0.0002
72
+ weight_decay: 0.0
73
+ generator_params:
74
+ aux_channels: 80
75
+ dropout: 0.0
76
+ gate_channels: 128
77
+ in_channels: 1
78
+ kernel_size: 3
79
+ layers: 30
80
+ out_channels: 1
81
+ residual_channels: 64
82
+ skip_channels: 64
83
+ stacks: 3
84
+ upsample_net: ConvInUpsampleNetwork
85
+ upsample_params:
86
+ upsample_scales:
87
+ - 2
88
+ - 4
89
+ - 4
90
+ - 4
91
+ use_nsf: false
92
+ use_pitch_embed: true
93
+ use_weight_norm: true
94
+ generator_scheduler_params:
95
+ gamma: 0.999
96
+ step_size: 600
97
+ griffin_lim_iters: 60
98
+ hidden_size: 256
99
+ hop_size: 128
100
+ infer: false
101
+ lambda_adv: 1.0
102
+ lambda_cdisc: 4.0
103
+ lambda_energy: 0.0
104
+ lambda_f0: 0.0
105
+ lambda_mel: 5.0
106
+ lambda_mel_adv: 1.0
107
+ lambda_ph_dur: 0.0
108
+ lambda_sent_dur: 0.0
109
+ lambda_uv: 0.0
110
+ lambda_word_dur: 0.0
111
+ load_ckpt: ''
112
+ loud_norm: false
113
+ lr: 2.0
114
+ max_epochs: 1000
115
+ max_frames: 2400
116
+ max_input_tokens: 1550
117
+ max_samples: 8192
118
+ max_sentences: 20
119
+ max_tokens: 24000
120
+ max_updates: 3000000
121
+ max_valid_sentences: 1
122
+ max_valid_tokens: 60000
123
+ mel_loss: ssim:0.5|l1:0.5
124
+ mel_vmax: 1.5
125
+ mel_vmin: -6
126
+ min_frames: 0
127
+ min_level_db: -120
128
+ num_ckpt_keep: 3
129
+ num_heads: 2
130
+ num_mels: 80
131
+ num_sanity_val_steps: 5
132
+ num_spk: 100
133
+ num_test_samples: 0
134
+ num_valid_plots: 10
135
+ optimizer_adam_beta1: 0.9
136
+ optimizer_adam_beta2: 0.98
137
+ out_wav_norm: false
138
+ pitch_extractor: parselmouth
139
+ pitch_type: frame
140
+ pre_align_args:
141
+ allow_no_txt: false
142
+ denoise: false
143
+ sox_resample: true
144
+ sox_to_wav: false
145
+ trim_sil: false
146
+ txt_processor: zh
147
+ use_tone: false
148
+ pre_align_cls: data_gen.tts.singing.pre_align.SingingPreAlign
149
+ predictor_grad: 0.0
150
+ print_nan_grads: false
151
+ processed_data_dir: ''
152
+ profile_infer: false
153
+ raw_data_dir: ''
154
+ ref_level_db: 20
155
+ rename_tmux: true
156
+ rerun_gen: true
157
+ resblock: '1'
158
+ resblock_dilation_sizes:
159
+ - - 1
160
+ - 3
161
+ - 5
162
+ - - 1
163
+ - 3
164
+ - 5
165
+ - - 1
166
+ - 3
167
+ - 5
168
+ resblock_kernel_sizes:
169
+ - 3
170
+ - 7
171
+ - 11
172
+ resume_from_checkpoint: 0
173
+ save_best: true
174
+ save_codes: []
175
+ save_f0: true
176
+ save_gt: true
177
+ scheduler: rsqrt
178
+ seed: 1234
179
+ sort_by_len: true
180
+ stft_loss_params:
181
+ fft_sizes:
182
+ - 1024
183
+ - 2048
184
+ - 512
185
+ hop_sizes:
186
+ - 120
187
+ - 240
188
+ - 50
189
+ win_lengths:
190
+ - 600
191
+ - 1200
192
+ - 240
193
+ window: hann_window
194
+ task_cls: tasks.vocoder.hifigan.HifiGanTask
195
+ tb_log_interval: 100
196
+ test_ids: []
197
+ test_input_dir: ''
198
+ test_num: 50
199
+ test_prefixes: []
200
+ test_set_name: test
201
+ train_set_name: train
202
+ train_sets: ''
203
+ upsample_initial_channel: 512
204
+ upsample_kernel_sizes:
205
+ - 16
206
+ - 16
207
+ - 4
208
+ - 4
209
+ upsample_rates:
210
+ - 8
211
+ - 4
212
+ - 2
213
+ - 2
214
+ use_cdisc: false
215
+ use_cond_disc: false
216
+ use_fm_loss: false
217
+ use_gt_dur: true
218
+ use_gt_f0: true
219
+ use_mel_loss: true
220
+ use_ms_stft: false
221
+ use_pitch_embed: true
222
+ use_ref_enc: true
223
+ use_spec_disc: false
224
+ use_spk_embed: false
225
+ use_spk_id: false
226
+ use_split_spk_id: false
227
+ val_check_interval: 2000
228
+ valid_infer_interval: 10000
229
+ valid_monitor_key: val_loss
230
+ valid_monitor_mode: min
231
+ valid_set_name: valid
232
+ vocoder: pwg
233
+ vocoder_ckpt: ''
234
+ vocoder_denoise_c: 0.0
235
+ warmup_updates: 8000
236
+ weight_decay: 0
237
+ win_length: null
238
+ win_size: 512
239
+ window: hann
240
+ word_size: 3000
241
+ work_dir: checkpoints/0109_hifigan_bigpopcs_hop128
checkpoints/0109_hifigan_bigpopcs_hop128/model_ckpt_steps_1512000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb68f3ce0c46ba0a8b6d49718f1fffdf5bd7bcab769a986fd2fd129835cc1d1
3
+ size 55827436
checkpoints/0228_opencpop_ds100_rel/config.yaml ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 100
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 80
4
+ audio_sample_rate: 24000
5
+ base_config:
6
+ - usr/configs/popcs_ds_beta6.yaml
7
+ - usr/configs/midi/cascade/opencs/opencpop_statis.yaml
8
+ binarization_args:
9
+ shuffle: false
10
+ with_align: true
11
+ with_f0: true
12
+ with_f0cwt: true
13
+ with_spk_embed: false
14
+ with_txt: true
15
+ with_wav: true
16
+ binarizer_cls: data_gen.singing.binarize.OpencpopBinarizer
17
+ binary_data_dir: data/binary/opencpop-midi-dp
18
+ check_val_every_n_epoch: 10
19
+ clip_grad_norm: 1
20
+ content_cond_steps: []
21
+ cwt_add_f0_loss: false
22
+ cwt_hidden_size: 128
23
+ cwt_layers: 2
24
+ cwt_loss: l1
25
+ cwt_std_scale: 0.8
26
+ datasets:
27
+ - popcs
28
+ debug: false
29
+ dec_ffn_kernel_size: 9
30
+ dec_layers: 4
31
+ decay_steps: 50000
32
+ decoder_type: fft
33
+ dict_dir: ''
34
+ diff_decoder_type: wavenet
35
+ diff_loss_type: l1
36
+ dilation_cycle_length: 4
37
+ dropout: 0.1
38
+ ds_workers: 4
39
+ dur_enc_hidden_stride_kernel:
40
+ - 0,2,3
41
+ - 0,2,3
42
+ - 0,1,3
43
+ dur_loss: mse
44
+ dur_predictor_kernel: 3
45
+ dur_predictor_layers: 5
46
+ enc_ffn_kernel_size: 9
47
+ enc_layers: 4
48
+ encoder_K: 8
49
+ encoder_type: fft
50
+ endless_ds: true
51
+ ffn_act: gelu
52
+ ffn_padding: SAME
53
+ fft_size: 512
54
+ fmax: 12000
55
+ fmin: 30
56
+ fs2_ckpt: ''
57
+ gaussian_start: true
58
+ gen_dir_name: ''
59
+ gen_tgt_spk_id: -1
60
+ hidden_size: 256
61
+ hop_size: 128
62
+ infer: false
63
+ keep_bins: 80
64
+ lambda_commit: 0.25
65
+ lambda_energy: 0.0
66
+ lambda_f0: 0.0
67
+ lambda_ph_dur: 1.0
68
+ lambda_sent_dur: 1.0
69
+ lambda_uv: 0.0
70
+ lambda_word_dur: 1.0
71
+ load_ckpt: ''
72
+ log_interval: 100
73
+ loud_norm: false
74
+ lr: 0.001
75
+ max_beta: 0.06
76
+ max_epochs: 1000
77
+ max_eval_sentences: 1
78
+ max_eval_tokens: 60000
79
+ max_frames: 8000
80
+ max_input_tokens: 1550
81
+ max_sentences: 48
82
+ max_tokens: 40000
83
+ max_updates: 160000
84
+ mel_loss: ssim:0.5|l1:0.5
85
+ mel_vmax: 1.5
86
+ mel_vmin: -6.0
87
+ min_level_db: -120
88
+ norm_type: gn
89
+ num_ckpt_keep: 3
90
+ num_heads: 2
91
+ num_sanity_val_steps: 1
92
+ num_spk: 1
93
+ num_test_samples: 0
94
+ num_valid_plots: 10
95
+ optimizer_adam_beta1: 0.9
96
+ optimizer_adam_beta2: 0.98
97
+ out_wav_norm: false
98
+ pe_ckpt: checkpoints/0102_xiaoma_pe
99
+ pe_enable: true
100
+ pitch_ar: false
101
+ pitch_enc_hidden_stride_kernel:
102
+ - 0,2,5
103
+ - 0,2,5
104
+ - 0,2,5
105
+ pitch_extractor: parselmouth
106
+ pitch_loss: l1
107
+ pitch_norm: log
108
+ pitch_type: frame
109
+ pre_align_args:
110
+ allow_no_txt: false
111
+ denoise: false
112
+ forced_align: mfa
113
+ txt_processor: zh_g2pM
114
+ use_sox: true
115
+ use_tone: false
116
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
117
+ predictor_dropout: 0.5
118
+ predictor_grad: 0.1
119
+ predictor_hidden: -1
120
+ predictor_kernel: 5
121
+ predictor_layers: 5
122
+ prenet_dropout: 0.5
123
+ prenet_hidden_size: 256
124
+ pretrain_fs_ckpt: ''
125
+ processed_data_dir: data/processed/popcs
126
+ profile_infer: false
127
+ raw_data_dir: data/raw/popcs
128
+ ref_norm_layer: bn
129
+ rel_pos: true
130
+ reset_phone_dict: true
131
+ residual_channels: 256
132
+ residual_layers: 20
133
+ save_best: false
134
+ save_ckpt: true
135
+ save_codes:
136
+ - configs
137
+ - modules
138
+ - tasks
139
+ - utils
140
+ - usr
141
+ save_f0: true
142
+ save_gt: false
143
+ schedule_type: linear
144
+ seed: 1234
145
+ sort_by_len: true
146
+ spec_max:
147
+ - -0.79453
148
+ - -0.81116
149
+ - -0.61631
150
+ - -0.30679
151
+ - -0.13863
152
+ - -0.050652
153
+ - -0.11563
154
+ - -0.10679
155
+ - -0.091068
156
+ - -0.062174
157
+ - -0.075302
158
+ - -0.072217
159
+ - -0.063815
160
+ - -0.073299
161
+ - 0.007361
162
+ - -0.072508
163
+ - -0.050234
164
+ - -0.16534
165
+ - -0.26928
166
+ - -0.20782
167
+ - -0.20823
168
+ - -0.11702
169
+ - -0.070128
170
+ - -0.065868
171
+ - -0.012675
172
+ - 0.0015121
173
+ - -0.089902
174
+ - -0.21392
175
+ - -0.23789
176
+ - -0.28922
177
+ - -0.30405
178
+ - -0.23029
179
+ - -0.22088
180
+ - -0.21542
181
+ - -0.29367
182
+ - -0.30137
183
+ - -0.38281
184
+ - -0.4359
185
+ - -0.28681
186
+ - -0.46855
187
+ - -0.57485
188
+ - -0.47022
189
+ - -0.54266
190
+ - -0.44848
191
+ - -0.6412
192
+ - -0.687
193
+ - -0.6486
194
+ - -0.76436
195
+ - -0.49971
196
+ - -0.71068
197
+ - -0.69724
198
+ - -0.61487
199
+ - -0.55843
200
+ - -0.69773
201
+ - -0.57502
202
+ - -0.70919
203
+ - -0.82431
204
+ - -0.84213
205
+ - -0.90431
206
+ - -0.8284
207
+ - -0.77945
208
+ - -0.82758
209
+ - -0.87699
210
+ - -1.0532
211
+ - -1.0766
212
+ - -1.1198
213
+ - -1.0185
214
+ - -0.98983
215
+ - -1.0001
216
+ - -1.0756
217
+ - -1.0024
218
+ - -1.0304
219
+ - -1.0579
220
+ - -1.0188
221
+ - -1.05
222
+ - -1.0842
223
+ - -1.0923
224
+ - -1.1223
225
+ - -1.2381
226
+ - -1.6467
227
+ spec_min:
228
+ - -6.0
229
+ - -6.0
230
+ - -6.0
231
+ - -6.0
232
+ - -6.0
233
+ - -6.0
234
+ - -6.0
235
+ - -6.0
236
+ - -6.0
237
+ - -6.0
238
+ - -6.0
239
+ - -6.0
240
+ - -6.0
241
+ - -6.0
242
+ - -6.0
243
+ - -6.0
244
+ - -6.0
245
+ - -6.0
246
+ - -6.0
247
+ - -6.0
248
+ - -6.0
249
+ - -6.0
250
+ - -6.0
251
+ - -6.0
252
+ - -6.0
253
+ - -6.0
254
+ - -6.0
255
+ - -6.0
256
+ - -6.0
257
+ - -6.0
258
+ - -6.0
259
+ - -6.0
260
+ - -6.0
261
+ - -6.0
262
+ - -6.0
263
+ - -6.0
264
+ - -6.0
265
+ - -6.0
266
+ - -6.0
267
+ - -6.0
268
+ - -6.0
269
+ - -6.0
270
+ - -6.0
271
+ - -6.0
272
+ - -6.0
273
+ - -6.0
274
+ - -6.0
275
+ - -6.0
276
+ - -6.0
277
+ - -6.0
278
+ - -6.0
279
+ - -6.0
280
+ - -6.0
281
+ - -6.0
282
+ - -6.0
283
+ - -6.0
284
+ - -6.0
285
+ - -6.0
286
+ - -6.0
287
+ - -6.0
288
+ - -6.0
289
+ - -6.0
290
+ - -6.0
291
+ - -6.0
292
+ - -6.0
293
+ - -6.0
294
+ - -6.0
295
+ - -6.0
296
+ - -6.0
297
+ - -6.0
298
+ - -6.0
299
+ - -6.0
300
+ - -6.0
301
+ - -6.0
302
+ - -6.0
303
+ - -6.0
304
+ - -6.0
305
+ - -6.0
306
+ - -6.0
307
+ - -6.0
308
+ spk_cond_steps: []
309
+ stop_token_weight: 5.0
310
+ task_cls: usr.diffsinger_task.DiffSingerMIDITask
311
+ test_ids: []
312
+ test_input_dir: ''
313
+ test_num: 0
314
+ test_prefixes:
315
+ - "popcs-\u8BF4\u6563\u5C31\u6563"
316
+ - "popcs-\u9690\u5F62\u7684\u7FC5\u8180"
317
+ test_set_name: test
318
+ timesteps: 100
319
+ train_set_name: train
320
+ use_denoise: false
321
+ use_energy_embed: false
322
+ use_gt_dur: false
323
+ use_gt_f0: false
324
+ use_midi: true
325
+ use_nsf: true
326
+ use_pitch_embed: false
327
+ use_pos_embed: true
328
+ use_spk_embed: false
329
+ use_spk_id: false
330
+ use_split_spk_id: false
331
+ use_uv: true
332
+ use_var_enc: false
333
+ val_check_interval: 2000
334
+ valid_num: 0
335
+ valid_set_name: valid
336
+ vocoder: vocoders.hifigan.HifiGAN
337
+ vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128
338
+ warmup_updates: 2000
339
+ wav2spec_eps: 1e-6
340
+ weight_decay: 0
341
+ win_size: 512
342
+ work_dir: checkpoints/0228_opencpop_ds100_rel
checkpoints/0228_opencpop_ds100_rel/model_ckpt_steps_160000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8261f7415bb39eb80a19d4c27c0ea084f63af2fdf6b82e63fcbd9cd82fc90c
3
+ size 170226367
checkpoints/clean.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+
4
+ if __name__ == '__main__':
5
+ ckpt_path = sys.argv[1]
6
+ checkpoint = torch.load(ckpt_path, map_location='cpu')
7
+ print(checkpoint['state_dict'].keys())
8
+ if 'model' in checkpoint['state_dict']:
9
+ checkpoint = {'state_dict': {'model': checkpoint['state_dict']['model']}}
10
+ else:
11
+ checkpoint = {'state_dict': {'model_gen': checkpoint['state_dict']['model_gen']}}
12
+ torch.save(checkpoint, ckpt_path, _use_new_zipfile_serialization=False)
checkpoints/cleaner.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+
4
+ if __name__ == '__main__':
5
+ ckpt_path = sys.argv[1]
6
+ checkpoint = torch.load(ckpt_path, map_location='cpu')
7
+ checkpoint = {'state_dict': checkpoint['state_dict']}
8
+ torch.save(checkpoint, ckpt_path, _use_new_zipfile_serialization=False)