yerfor commited on
Commit
163b951
·
1 Parent(s): 87fac49

add vocoder

Browse files
checkpoints/hifi_lj/.DS_Store ADDED
Binary file (6.15 kB). View file
 
checkpoints/hifi_lj/config.yaml ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ adam_b1: 0.8
3
+ adam_b2: 0.99
4
+ amp: false
5
+ audio_num_mel_bins: 80
6
+ audio_sample_rate: 22050
7
+ base_config:
8
+ - configs/tts/hifigan.yaml
9
+ - configs/tts/lj/base_mel2wav.yaml
10
+ binarization_args:
11
+ shuffle: false
12
+ trim_eos_bos: false
13
+ trim_sil: false
14
+ with_align: false
15
+ with_f0: true
16
+ with_f0cwt: false
17
+ with_linear: false
18
+ with_spk_embed: false
19
+ with_txt: true
20
+ with_wav: true
21
+ binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
22
+ binary_data_dir: data/binary/ljspeech_wav
23
+ check_val_every_n_epoch: 10
24
+ clip_grad_norm: 1
25
+ clip_grad_value: 0
26
+ debug: false
27
+ dec_ffn_kernel_size: 9
28
+ dec_layers: 4
29
+ dict_dir: ''
30
+ disc_start_steps: 40000
31
+ discriminator_grad_norm: 1
32
+ discriminator_optimizer_params:
33
+ eps: 1.0e-06
34
+ lr: 0.0002
35
+ weight_decay: 0.0
36
+ discriminator_params:
37
+ bias: true
38
+ conv_channels: 64
39
+ in_channels: 1
40
+ kernel_size: 3
41
+ layers: 10
42
+ nonlinear_activation: LeakyReLU
43
+ nonlinear_activation_params:
44
+ negative_slope: 0.2
45
+ out_channels: 1
46
+ use_weight_norm: true
47
+ discriminator_scheduler_params:
48
+ gamma: 0.999
49
+ step_size: 600
50
+ dropout: 0.1
51
+ ds_workers: 1
52
+ enc_ffn_kernel_size: 9
53
+ enc_layers: 4
54
+ endless_ds: true
55
+ ffn_act: gelu
56
+ ffn_padding: SAME
57
+ fft_size: 1024
58
+ fm_loss: false
59
+ fmax: 7600
60
+ fmin: 80
61
+ frames_multiple: 1
62
+ gen_dir_name: ''
63
+ generator_grad_norm: 10
64
+ generator_optimizer_params:
65
+ eps: 1.0e-06
66
+ lr: 0.0002
67
+ weight_decay: 0.0
68
+ generator_params:
69
+ aux_channels: 80
70
+ aux_context_window: 0
71
+ dropout: 0.0
72
+ gate_channels: 128
73
+ in_channels: 1
74
+ kernel_size: 3
75
+ layers: 30
76
+ out_channels: 1
77
+ residual_channels: 64
78
+ skip_channels: 64
79
+ stacks: 3
80
+ upsample_net: ConvInUpsampleNetwork
81
+ upsample_params:
82
+ upsample_scales:
83
+ - 4
84
+ - 4
85
+ - 4
86
+ - 4
87
+ use_nsf: false
88
+ use_pitch_embed: false
89
+ use_weight_norm: true
90
+ generator_scheduler_params:
91
+ gamma: 0.999
92
+ step_size: 600
93
+ griffin_lim_iters: 60
94
+ hidden_size: 256
95
+ hop_size: 256
96
+ infer: false
97
+ lambda_adv: 4.0
98
+ lambda_mel: 45.0
99
+ load_ckpt: ''
100
+ loud_norm: false
101
+ lr: 2.0
102
+ max_epochs: 1000
103
+ max_eval_sentences: 1
104
+ max_eval_tokens: 60000
105
+ max_frames: 1548
106
+ max_input_tokens: 1550
107
+ max_samples: 8192
108
+ max_sentences: 24
109
+ max_tokens: 30000
110
+ max_updates: 3000000
111
+ mel_vmax: 1.5
112
+ mel_vmin: -6
113
+ min_level_db: -100
114
+ num_ckpt_keep: 3
115
+ num_heads: 2
116
+ num_mels: 80
117
+ num_sanity_val_steps: 5
118
+ num_spk: 1
119
+ optimizer_adam_beta1: 0.9
120
+ optimizer_adam_beta2: 0.98
121
+ out_wav_norm: false
122
+ pitch_extractor: parselmouth
123
+ pre_align_args:
124
+ allow_no_txt: false
125
+ denoise: false
126
+ forced_align: mfa
127
+ sox_resample: false
128
+ trim_sil: false
129
+ txt_processor: en
130
+ use_tone: true
131
+ pre_align_cls: ''
132
+ print_nan_grads: false
133
+ processed_data_dir: data/processed/ljspeech
134
+ profile_infer: false
135
+ raw_data_dir: data/raw/LJSpeech-1.1
136
+ ref_level_db: 20
137
+ rerun_gen: true
138
+ resblock: '1'
139
+ resblock_dilation_sizes:
140
+ - - 1
141
+ - 3
142
+ - 5
143
+ - - 1
144
+ - 3
145
+ - 5
146
+ - - 1
147
+ - 3
148
+ - 5
149
+ resblock_kernel_sizes:
150
+ - 3
151
+ - 7
152
+ - 11
153
+ reset_phone_dict: true
154
+ resume_from_checkpoint: 0
155
+ sampling_rate: 22050
156
+ save_best: true
157
+ save_codes: []
158
+ save_f0: false
159
+ save_gt: true
160
+ seed: 1234
161
+ sort_by_len: true
162
+ stft_loss_params:
163
+ fft_sizes:
164
+ - 1024
165
+ - 2048
166
+ - 512
167
+ hop_sizes:
168
+ - 120
169
+ - 240
170
+ - 50
171
+ win_lengths:
172
+ - 600
173
+ - 1200
174
+ - 240
175
+ window: hann_window
176
+ stop_token_weight: 5.0
177
+ task_cls: tasks.vocoder.hifigan.HifiGanTask
178
+ tb_log_interval: 100
179
+ test_input_dir: ''
180
+ test_num: 100
181
+ test_set_name: test
182
+ train_set_name: train
183
+ upsample_initial_channel: 512
184
+ upsample_kernel_sizes:
185
+ - 16
186
+ - 16
187
+ - 4
188
+ - 4
189
+ upsample_rates:
190
+ - 8
191
+ - 8
192
+ - 2
193
+ - 2
194
+ use_mel_loss: false
195
+ use_pitch_embed: false
196
+ val_check_interval: 2000
197
+ valid_monitor_key: val_loss
198
+ valid_monitor_mode: min
199
+ valid_set_name: valid
200
+ vocoder: pwg
201
+ vocoder_ckpt: ''
202
+ warmup_updates: 8000
203
+ weight_decay: 0
204
+ win_length: null
205
+ win_size: 1024
206
+ window: hann
207
+ work_dir: checkpoints/0414_hifi_lj_1
checkpoints/hifi_lj/model_ckpt_steps_2076000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bbc40f0471a92394f6bf057820cf66a1f50d29db22c997341448bd496a0792d
3
+ size 55786088