RayeRen commited on
Commit
f26c987
β€’
1 Parent(s): d1b91e7
checkpoints/fs2_exp/config.yaml CHANGED
@@ -64,7 +64,7 @@ encoder_K: 8
64
  encoder_type: fft
65
  endless_ds: true
66
  eval_max_batches: -1
67
- f0_max: 800
68
  f0_min: 80
69
  ffn_act: gelu
70
  ffn_hidden_size: 1024
 
64
  encoder_type: fft
65
  endless_ds: true
66
  eval_max_batches: -1
67
+ f0_max: 600
68
  f0_min: 80
69
  ffn_act: gelu
70
  ffn_hidden_size: 1024
checkpoints/fs2_exp/{model_ckpt_steps_98000.ckpt β†’ model_ckpt_steps_160000.ckpt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d4f450bb3115e04b4ea93eed8c9318f08d01582bed1dd86886b32d50601dc58
3
  size 108423039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f73b9e52933d3e2bcf6591ac375ab25dc3bdc4b03e1ae5058fc003fa0dd721e
3
  size 108423039
checkpoints/fs_pitch/config.yaml ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ amp: false
3
+ audio_num_mel_bins: 80
4
+ audio_sample_rate: 22050
5
+ base_config:
6
+ - egs/egs_bases/tts/fs.yaml
7
+ - ./base_text2mel.yaml
8
+ binarization_args:
9
+ min_sil_duration: 0.1
10
+ shuffle: false
11
+ test_range:
12
+ - 0
13
+ - 523
14
+ train_range:
15
+ - 871
16
+ - -1
17
+ trim_eos_bos: false
18
+ valid_range:
19
+ - 523
20
+ - 871
21
+ with_align: true
22
+ with_f0: true
23
+ with_f0cwt: false
24
+ with_linear: false
25
+ with_spk_embed: false
26
+ with_wav: false
27
+ binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
28
+ binary_data_dir: data/binary/ljspeech
29
+ check_val_every_n_epoch: 10
30
+ clip_grad_norm: 1
31
+ clip_grad_value: 0
32
+ conv_use_pos: false
33
+ debug: false
34
+ dec_dilations:
35
+ - 1
36
+ - 1
37
+ - 1
38
+ - 1
39
+ dec_ffn_kernel_size: 9
40
+ dec_inp_add_noise: false
41
+ dec_kernel_size: 5
42
+ dec_layers: 4
43
+ dec_post_net_kernel: 3
44
+ decoder_rnn_dim: 0
45
+ decoder_type: conv
46
+ dropout: 0.0
47
+ ds_workers: 2
48
+ dur_predictor_kernel: 3
49
+ dur_predictor_layers: 2
50
+ enc_dec_norm: ln
51
+ enc_dilations:
52
+ - 1
53
+ - 1
54
+ - 1
55
+ - 1
56
+ enc_ffn_kernel_size: 9
57
+ enc_kernel_size: 5
58
+ enc_layers: 4
59
+ enc_post_net_kernel: 3
60
+ enc_pre_ln: true
61
+ enc_prenet: true
62
+ encoder_K: 8
63
+ encoder_type: rel_fft
64
+ endless_ds: true
65
+ eval_max_batches: -1
66
+ f0_max: 600
67
+ f0_min: 80
68
+ ffn_act: gelu
69
+ ffn_hidden_size: 1024
70
+ fft_size: 1024
71
+ fmax: 7600
72
+ fmin: 80
73
+ frames_multiple: 1
74
+ gen_dir_name: ''
75
+ griffin_lim_iters: 30
76
+ hidden_size: 256
77
+ hop_size: 256
78
+ infer: false
79
+ lambda_commit: 0.25
80
+ lambda_energy: 0.1
81
+ lambda_f0: 1.0
82
+ lambda_ph_dur: 0.1
83
+ lambda_sent_dur: 1.0
84
+ lambda_uv: 1.0
85
+ lambda_word_dur: 1.0
86
+ layers_in_block: 2
87
+ load_ckpt: ''
88
+ loud_norm: false
89
+ lr: 0.0005
90
+ max_epochs: 1000
91
+ max_frames: 1548
92
+ max_input_tokens: 1550
93
+ max_sentences: 128
94
+ max_tokens: 40000
95
+ max_updates: 160000
96
+ max_valid_sentences: 1
97
+ max_valid_tokens: 60000
98
+ mel_losses: l1:0.5|ssim:0.5
99
+ mel_vmax: 1.5
100
+ mel_vmin: -6
101
+ min_frames: 0
102
+ num_ckpt_keep: 3
103
+ num_heads: 2
104
+ num_sanity_val_steps: 5
105
+ num_spk: 1
106
+ num_valid_plots: 10
107
+ optimizer_adam_beta1: 0.9
108
+ optimizer_adam_beta2: 0.98
109
+ out_wav_norm: false
110
+ pitch_extractor: parselmouth
111
+ pitch_key: pitch
112
+ pitch_type: frame
113
+ predictor_dropout: 0.5
114
+ predictor_grad: 0.1
115
+ predictor_hidden: -1
116
+ predictor_kernel: 5
117
+ predictor_layers: 2
118
+ preprocess_args:
119
+ add_eos_bos: true
120
+ mfa_group_shuffle: false
121
+ mfa_offset: 0.02
122
+ nsample_per_mfa_group: 1000
123
+ reset_phone_dict: true
124
+ reset_word_dict: true
125
+ save_sil_mask: true
126
+ txt_processor: en
127
+ use_mfa: true
128
+ vad_max_silence_length: 12
129
+ wav_processors: []
130
+ with_phsep: true
131
+ preprocess_cls: egs.datasets.audio.lj.preprocess.LJPreprocess
132
+ print_nan_grads: false
133
+ processed_data_dir: data/processed/ljspeech
134
+ profile_infer: false
135
+ raw_data_dir: data/raw/LJSpeech-1.1
136
+ ref_norm_layer: bn
137
+ rename_tmux: true
138
+ resume_from_checkpoint: 0
139
+ save_best: false
140
+ save_codes:
141
+ - tasks
142
+ - modules
143
+ - egs
144
+ save_f0: false
145
+ save_gt: true
146
+ scheduler: warmup
147
+ seed: 1234
148
+ sort_by_len: true
149
+ task_cls: tasks.tts.fs.FastSpeechTask
150
+ tb_log_interval: 100
151
+ test_ids:
152
+ - 0
153
+ - 1
154
+ - 2
155
+ - 3
156
+ - 4
157
+ - 5
158
+ - 6
159
+ - 7
160
+ - 8
161
+ - 9
162
+ - 10
163
+ - 11
164
+ - 12
165
+ - 13
166
+ - 14
167
+ - 15
168
+ - 16
169
+ - 17
170
+ - 18
171
+ - 19
172
+ - 68
173
+ - 70
174
+ - 74
175
+ - 87
176
+ - 110
177
+ - 172
178
+ - 190
179
+ - 215
180
+ - 231
181
+ - 294
182
+ - 316
183
+ - 324
184
+ - 402
185
+ - 422
186
+ - 485
187
+ - 500
188
+ - 505
189
+ - 508
190
+ - 509
191
+ - 519
192
+ test_input_yaml: ''
193
+ test_num: 100
194
+ test_set_name: test
195
+ train_set_name: train
196
+ train_sets: ''
197
+ use_gt_dur: false
198
+ use_gt_f0: false
199
+ use_pitch_embed: true
200
+ use_pos_embed: true
201
+ use_spk_embed: false
202
+ use_spk_id: false
203
+ use_uv: true
204
+ use_word_input: false
205
+ val_check_interval: 2000
206
+ valid_infer_interval: 10000
207
+ valid_monitor_key: val_loss
208
+ valid_monitor_mode: min
209
+ valid_set_name: valid
210
+ vocoder: HifiGAN
211
+ vocoder_ckpt: checkpoints/hifi_lj
212
+ warmup_updates: 4000
213
+ weight_decay: 0
214
+ win_size: 1024
215
+ word_dict_size: 10000
216
+ work_dir: checkpoints/1402_fs2_lj_1
checkpoints/fs_pitch/model_ckpt_steps_160000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f3bd822cd2a1d976e7edc0d6bf84f13d06f01265efdcac88880bae5ae3e7140
3
+ size 85090681
checkpoints/ps_normal_exp/config.yaml CHANGED
@@ -65,7 +65,7 @@ encoder_K: 8
65
  encoder_type: rel_fft
66
  endless_ds: true
67
  eval_max_batches: -1
68
- f0_max: 800
69
  f0_min: 80
70
  ffn_act: gelu
71
  ffn_hidden_size: 768
 
65
  encoder_type: rel_fft
66
  endless_ds: true
67
  eval_max_batches: -1
68
+ f0_max: 600
69
  f0_min: 80
70
  ffn_act: gelu
71
  ffn_hidden_size: 768
checkpoints/ps_normal_exp/{model_ckpt_steps_278000.ckpt β†’ model_ckpt_steps_406000.ckpt} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a51035b84c2a385d05ce695f6dca0b5095e7bd7ea3b1d34a22aed4d9c9b5fc
3
  size 104081102
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:045ea7ba3df9eae43347e8cd59c872f501b9cfd4c0b22fe1d610cdc8ab0b8aee
3
  size 104081102
checkpoints/ps_small_exp/config.yaml CHANGED
@@ -65,7 +65,7 @@ encoder_K: 8
65
  encoder_type: rel_fft
66
  endless_ds: true
67
  eval_max_batches: -1
68
- f0_max: 800
69
  f0_min: 80
70
  ffn_act: gelu
71
  ffn_hidden_size: 512
 
65
  encoder_type: rel_fft
66
  endless_ds: true
67
  eval_max_batches: -1
68
+ f0_max: 600
69
  f0_min: 80
70
  ffn_act: gelu
71
  ffn_hidden_size: 512
checkpoints/ps_small_exp/model_ckpt_steps_410000.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6905d8969febca192f2239a99e833d9084b2e07cb6894a63e286901ab1d16553
3
  size 32754716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760eb5c55d29327f3b4ccdff425c93bafc1d30eae90c218bf81a3a2afbe7f4be
3
  size 32754716