soumi-maiti commited on
Commit
608fb4f
1 Parent(s): 12fce2f

Adding pretrained models

Browse files
vctk_hifigan_hubert_large_km1000_24khz.v1/checkpoint-300000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b964d1fd1791e3171cbbadfa7204e99323fb09f68e4f4b50ed1b42c8710fda0
3
+ size 1055009658
vctk_hifigan_hubert_large_km1000_24khz.v1/checkpoint-600000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ba2d1a3c35ded16ef30a06bbb18527ca52810a61b9aa2484035b5b90dd6c31
3
+ size 1055009658
vctk_hifigan_hubert_large_km1000_24khz.v1/config.yml ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_cache: true
2
+ batch_max_steps: 10240
3
+ batch_size: 32
4
+ config: conf/hifigan_hubert_large_km1000_24khz.v1.yaml
5
+ dev_dumpdir: dump_hu1000/dev/raw
6
+ dev_feats_scp: null
7
+ dev_segments: null
8
+ dev_wav_scp: null
9
+ discriminator_adv_loss_params:
10
+ average_by_discriminators: false
11
+ discriminator_grad_norm: -1
12
+ discriminator_optimizer_params:
13
+ betas:
14
+ - 0.5
15
+ - 0.9
16
+ lr: 0.0002
17
+ weight_decay: 0.0
18
+ discriminator_optimizer_type: Adam
19
+ discriminator_params:
20
+ follow_official_norm: true
21
+ period_discriminator_params:
22
+ bias: true
23
+ channels: 32
24
+ downsample_scales:
25
+ - 3
26
+ - 3
27
+ - 3
28
+ - 3
29
+ - 1
30
+ in_channels: 1
31
+ kernel_sizes:
32
+ - 5
33
+ - 3
34
+ max_downsample_channels: 1024
35
+ nonlinear_activation: LeakyReLU
36
+ nonlinear_activation_params:
37
+ negative_slope: 0.1
38
+ out_channels: 1
39
+ use_spectral_norm: false
40
+ use_weight_norm: true
41
+ periods:
42
+ - 2
43
+ - 3
44
+ - 5
45
+ - 7
46
+ - 11
47
+ scale_discriminator_params:
48
+ bias: true
49
+ channels: 128
50
+ downsample_scales:
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 1
56
+ in_channels: 1
57
+ kernel_sizes:
58
+ - 15
59
+ - 41
60
+ - 5
61
+ - 3
62
+ max_downsample_channels: 1024
63
+ max_groups: 16
64
+ nonlinear_activation: LeakyReLU
65
+ nonlinear_activation_params:
66
+ negative_slope: 0.1
67
+ out_channels: 1
68
+ scale_downsample_pooling: AvgPool1d
69
+ scale_downsample_pooling_params:
70
+ kernel_size: 4
71
+ padding: 2
72
+ stride: 2
73
+ scales: 3
74
+ discriminator_scheduler_params:
75
+ gamma: 0.5
76
+ milestones:
77
+ - 200000
78
+ - 400000
79
+ - 600000
80
+ - 800000
81
+ discriminator_scheduler_type: MultiStepLR
82
+ discriminator_train_start_steps: 0
83
+ discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
84
+ distributed: false
85
+ eval_interval_steps: 1000
86
+ feat_match_loss_params:
87
+ average_by_discriminators: false
88
+ average_by_layers: false
89
+ include_final_outputs: true
90
+ fft_size: null
91
+ fmax: null
92
+ fmin: null
93
+ format: hdf5
94
+ generator_adv_loss_params:
95
+ average_by_discriminators: false
96
+ generator_grad_norm: -1
97
+ generator_optimizer_params:
98
+ betas:
99
+ - 0.5
100
+ - 0.9
101
+ lr: 0.0002
102
+ weight_decay: 0.0
103
+ generator_optimizer_type: Adam
104
+ generator_params:
105
+ bias: true
106
+ channels: 512
107
+ concat_spk_emb: false
108
+ in_channels: 512
109
+ kernel_size: 7
110
+ nonlinear_activation: LeakyReLU
111
+ nonlinear_activation_params:
112
+ negative_slope: 0.1
113
+ num_embs: 1000
114
+ out_channels: 1
115
+ resblock_dilations:
116
+ - - 1
117
+ - 3
118
+ - 5
119
+ - - 1
120
+ - 3
121
+ - 5
122
+ - - 1
123
+ - 3
124
+ - 5
125
+ resblock_kernel_sizes:
126
+ - 3
127
+ - 7
128
+ - 11
129
+ spk_emb_dim: 512
130
+ upsample_kernal_sizes:
131
+ - 24
132
+ - 20
133
+ - 4
134
+ - 4
135
+ upsample_scales:
136
+ - 12
137
+ - 10
138
+ - 2
139
+ - 2
140
+ use_additional_convs: true
141
+ use_weight_norm: true
142
+ generator_scheduler_params:
143
+ gamma: 0.5
144
+ milestones:
145
+ - 200000
146
+ - 400000
147
+ - 600000
148
+ - 800000
149
+ generator_scheduler_type: MultiStepLR
150
+ generator_train_start_steps: 1
151
+ generator_type: DiscreteSymbolSpkEmbHiFiGANGenerator
152
+ global_gain_scale: 1.0
153
+ hop_size: 480
154
+ lambda_adv: 1.0
155
+ lambda_aux: 45.0
156
+ lambda_feat_match: 2.0
157
+ log_interval_steps: 100
158
+ mel_loss_params:
159
+ fft_size: 2048
160
+ fmax: 8000
161
+ fmin: 0
162
+ fs: 24000
163
+ hop_size: 300
164
+ log_base: null
165
+ num_mels: 80
166
+ win_length: null
167
+ window: hann
168
+ num_mels: 2
169
+ num_save_intermediate_results: 4
170
+ num_workers: 2
171
+ outdir: exp/tr_no_dev_vctk_hifigan_hubert_large_km1000_24khz.v1
172
+ pin_memory: true
173
+ pretrain: ''
174
+ rank: 0
175
+ remove_short_samples: false
176
+ resume: exp/tr_no_dev_vctk_hifigan_hubert_large_km1000_24khz.v1/checkpoint-450000steps.pkl
177
+ sampling_rate: 24000
178
+ save_interval_steps: 50000
179
+ train_dumpdir: dump_hu1000/tr_no_dev/raw
180
+ train_feats_scp: null
181
+ train_max_steps: 2500000
182
+ train_segments: null
183
+ train_wav_scp: null
184
+ trim_frame_size: 1024
185
+ trim_hop_size: 256
186
+ trim_silence: false
187
+ trim_threshold_in_db: 20
188
+ use_feat_match_loss: true
189
+ use_mel_loss: true
190
+ use_stft_loss: false
191
+ verbose: 1
192
+ version: 0.5.1
193
+ win_length: null
194
+ window: null
vctk_hifigan_hubert_large_km200_24khz.v1/checkpoint-300000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7229f0b73596228ec6eb7b20aa4793c51b0cc07bcda421ecb1645cd552c662
3
+ size 1050094458
vctk_hifigan_hubert_large_km200_24khz.v1/checkpoint-450000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdd3e3d316fa259541d89509aed4a46bee4eaa880901e3af020194d2c70db9b
3
+ size 1050094458
vctk_hifigan_hubert_large_km200_24khz.v1/config.yml ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_cache: true
2
+ batch_max_steps: 10240
3
+ batch_size: 32
4
+ config: conf/hifigan_hubert_large_km200_24khz.v1.yaml
5
+ dev_dumpdir: dump_hu200/dev/raw
6
+ dev_feats_scp: null
7
+ dev_segments: null
8
+ dev_wav_scp: null
9
+ discriminator_adv_loss_params:
10
+ average_by_discriminators: false
11
+ discriminator_grad_norm: -1
12
+ discriminator_optimizer_params:
13
+ betas:
14
+ - 0.5
15
+ - 0.9
16
+ lr: 0.0002
17
+ weight_decay: 0.0
18
+ discriminator_optimizer_type: Adam
19
+ discriminator_params:
20
+ follow_official_norm: true
21
+ period_discriminator_params:
22
+ bias: true
23
+ channels: 32
24
+ downsample_scales:
25
+ - 3
26
+ - 3
27
+ - 3
28
+ - 3
29
+ - 1
30
+ in_channels: 1
31
+ kernel_sizes:
32
+ - 5
33
+ - 3
34
+ max_downsample_channels: 1024
35
+ nonlinear_activation: LeakyReLU
36
+ nonlinear_activation_params:
37
+ negative_slope: 0.1
38
+ out_channels: 1
39
+ use_spectral_norm: false
40
+ use_weight_norm: true
41
+ periods:
42
+ - 2
43
+ - 3
44
+ - 5
45
+ - 7
46
+ - 11
47
+ scale_discriminator_params:
48
+ bias: true
49
+ channels: 128
50
+ downsample_scales:
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 1
56
+ in_channels: 1
57
+ kernel_sizes:
58
+ - 15
59
+ - 41
60
+ - 5
61
+ - 3
62
+ max_downsample_channels: 1024
63
+ max_groups: 16
64
+ nonlinear_activation: LeakyReLU
65
+ nonlinear_activation_params:
66
+ negative_slope: 0.1
67
+ out_channels: 1
68
+ scale_downsample_pooling: AvgPool1d
69
+ scale_downsample_pooling_params:
70
+ kernel_size: 4
71
+ padding: 2
72
+ stride: 2
73
+ scales: 3
74
+ discriminator_scheduler_params:
75
+ gamma: 0.5
76
+ milestones:
77
+ - 200000
78
+ - 400000
79
+ - 600000
80
+ - 800000
81
+ discriminator_scheduler_type: MultiStepLR
82
+ discriminator_train_start_steps: 0
83
+ discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
84
+ distributed: false
85
+ eval_interval_steps: 1000
86
+ feat_match_loss_params:
87
+ average_by_discriminators: false
88
+ average_by_layers: false
89
+ include_final_outputs: true
90
+ fft_size: null
91
+ fmax: null
92
+ fmin: null
93
+ format: hdf5
94
+ generator_adv_loss_params:
95
+ average_by_discriminators: false
96
+ generator_grad_norm: -1
97
+ generator_optimizer_params:
98
+ betas:
99
+ - 0.5
100
+ - 0.9
101
+ lr: 0.0002
102
+ weight_decay: 0.0
103
+ generator_optimizer_type: Adam
104
+ generator_params:
105
+ bias: true
106
+ channels: 512
107
+ concat_spk_emb: false
108
+ in_channels: 512
109
+ kernel_size: 7
110
+ nonlinear_activation: LeakyReLU
111
+ nonlinear_activation_params:
112
+ negative_slope: 0.1
113
+ num_embs: 200
114
+ out_channels: 1
115
+ resblock_dilations:
116
+ - - 1
117
+ - 3
118
+ - 5
119
+ - - 1
120
+ - 3
121
+ - 5
122
+ - - 1
123
+ - 3
124
+ - 5
125
+ resblock_kernel_sizes:
126
+ - 3
127
+ - 7
128
+ - 11
129
+ spk_emb_dim: 512
130
+ upsample_kernal_sizes:
131
+ - 24
132
+ - 20
133
+ - 4
134
+ - 4
135
+ upsample_scales:
136
+ - 12
137
+ - 10
138
+ - 2
139
+ - 2
140
+ use_additional_convs: true
141
+ use_weight_norm: true
142
+ generator_scheduler_params:
143
+ gamma: 0.5
144
+ milestones:
145
+ - 200000
146
+ - 400000
147
+ - 600000
148
+ - 800000
149
+ generator_scheduler_type: MultiStepLR
150
+ generator_train_start_steps: 1
151
+ generator_type: DiscreteSymbolSpkEmbHiFiGANGenerator
152
+ global_gain_scale: 1.0
153
+ hop_size: 480
154
+ lambda_adv: 1.0
155
+ lambda_aux: 45.0
156
+ lambda_feat_match: 2.0
157
+ log_interval_steps: 100
158
+ mel_loss_params:
159
+ fft_size: 2048
160
+ fmax: 8000
161
+ fmin: 0
162
+ fs: 24000
163
+ hop_size: 300
164
+ log_base: null
165
+ num_mels: 80
166
+ win_length: null
167
+ window: hann
168
+ num_mels: 2
169
+ num_save_intermediate_results: 4
170
+ num_workers: 2
171
+ outdir: exp/tr_no_dev_vctk_hifigan_hubert_large_km200_24khz.v1
172
+ pin_memory: true
173
+ pretrain: ''
174
+ rank: 0
175
+ remove_short_samples: false
176
+ resume: exp/tr_no_dev_vctk_hifigan_hubert_large_km200_24khz.v1/checkpoint-300000steps.pkl
177
+ sampling_rate: 24000
178
+ save_interval_steps: 50000
179
+ train_dumpdir: dump_hu200/tr_no_dev/raw
180
+ train_feats_scp: null
181
+ train_max_steps: 2500000
182
+ train_segments: null
183
+ train_wav_scp: null
184
+ trim_frame_size: 1024
185
+ trim_hop_size: 256
186
+ trim_silence: false
187
+ trim_threshold_in_db: 20
188
+ use_feat_match_loss: true
189
+ use_mel_loss: true
190
+ use_stft_loss: false
191
+ verbose: 1
192
+ version: 0.5.1
193
+ win_length: null
194
+ window: null
vctk_hifigan_hubert_large_km50_24khz.v1/checkpoint-300000steps.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789308b6c64424cbb3ed1b4d30403a3dcd998c8b4eeca44239d92f5c5525bd1e
3
+ size 1049172794
vctk_hifigan_hubert_large_km50_24khz.v1/config.yml ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ allow_cache: true
2
+ batch_max_steps: 10240
3
+ batch_size: 32
4
+ config: conf/hifigan_hubert_large_km50_24khz.v1.yaml
5
+ dev_dumpdir: dump_hu50/dev/raw
6
+ dev_feats_scp: null
7
+ dev_segments: null
8
+ dev_wav_scp: null
9
+ discriminator_adv_loss_params:
10
+ average_by_discriminators: false
11
+ discriminator_grad_norm: -1
12
+ discriminator_optimizer_params:
13
+ betas:
14
+ - 0.5
15
+ - 0.9
16
+ lr: 0.0002
17
+ weight_decay: 0.0
18
+ discriminator_optimizer_type: Adam
19
+ discriminator_params:
20
+ follow_official_norm: true
21
+ period_discriminator_params:
22
+ bias: true
23
+ channels: 32
24
+ downsample_scales:
25
+ - 3
26
+ - 3
27
+ - 3
28
+ - 3
29
+ - 1
30
+ in_channels: 1
31
+ kernel_sizes:
32
+ - 5
33
+ - 3
34
+ max_downsample_channels: 1024
35
+ nonlinear_activation: LeakyReLU
36
+ nonlinear_activation_params:
37
+ negative_slope: 0.1
38
+ out_channels: 1
39
+ use_spectral_norm: false
40
+ use_weight_norm: true
41
+ periods:
42
+ - 2
43
+ - 3
44
+ - 5
45
+ - 7
46
+ - 11
47
+ scale_discriminator_params:
48
+ bias: true
49
+ channels: 128
50
+ downsample_scales:
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 1
56
+ in_channels: 1
57
+ kernel_sizes:
58
+ - 15
59
+ - 41
60
+ - 5
61
+ - 3
62
+ max_downsample_channels: 1024
63
+ max_groups: 16
64
+ nonlinear_activation: LeakyReLU
65
+ nonlinear_activation_params:
66
+ negative_slope: 0.1
67
+ out_channels: 1
68
+ scale_downsample_pooling: AvgPool1d
69
+ scale_downsample_pooling_params:
70
+ kernel_size: 4
71
+ padding: 2
72
+ stride: 2
73
+ scales: 3
74
+ discriminator_scheduler_params:
75
+ gamma: 0.5
76
+ milestones:
77
+ - 200000
78
+ - 400000
79
+ - 600000
80
+ - 800000
81
+ discriminator_scheduler_type: MultiStepLR
82
+ discriminator_train_start_steps: 0
83
+ discriminator_type: HiFiGANMultiScaleMultiPeriodDiscriminator
84
+ distributed: false
85
+ eval_interval_steps: 1000
86
+ feat_match_loss_params:
87
+ average_by_discriminators: false
88
+ average_by_layers: false
89
+ include_final_outputs: true
90
+ fft_size: null
91
+ fmax: null
92
+ fmin: null
93
+ format: hdf5
94
+ generator_adv_loss_params:
95
+ average_by_discriminators: false
96
+ generator_grad_norm: -1
97
+ generator_optimizer_params:
98
+ betas:
99
+ - 0.5
100
+ - 0.9
101
+ lr: 0.0002
102
+ weight_decay: 0.0
103
+ generator_optimizer_type: Adam
104
+ generator_params:
105
+ bias: true
106
+ channels: 512
107
+ concat_spk_emb: false
108
+ in_channels: 512
109
+ kernel_size: 7
110
+ nonlinear_activation: LeakyReLU
111
+ nonlinear_activation_params:
112
+ negative_slope: 0.1
113
+ num_embs: 50
114
+ out_channels: 1
115
+ resblock_dilations:
116
+ - - 1
117
+ - 3
118
+ - 5
119
+ - - 1
120
+ - 3
121
+ - 5
122
+ - - 1
123
+ - 3
124
+ - 5
125
+ resblock_kernel_sizes:
126
+ - 3
127
+ - 7
128
+ - 11
129
+ spk_emb_dim: 512
130
+ upsample_kernal_sizes:
131
+ - 24
132
+ - 20
133
+ - 4
134
+ - 4
135
+ upsample_scales:
136
+ - 12
137
+ - 10
138
+ - 2
139
+ - 2
140
+ use_additional_convs: true
141
+ use_weight_norm: true
142
+ generator_scheduler_params:
143
+ gamma: 0.5
144
+ milestones:
145
+ - 200000
146
+ - 400000
147
+ - 600000
148
+ - 800000
149
+ generator_scheduler_type: MultiStepLR
150
+ generator_train_start_steps: 1
151
+ generator_type: DiscreteSymbolSpkEmbHiFiGANGenerator
152
+ global_gain_scale: 1.0
153
+ hop_size: 480
154
+ lambda_adv: 1.0
155
+ lambda_aux: 45.0
156
+ lambda_feat_match: 2.0
157
+ log_interval_steps: 100
158
+ mel_loss_params:
159
+ fft_size: 2048
160
+ fmax: 8000
161
+ fmin: 0
162
+ fs: 24000
163
+ hop_size: 300
164
+ log_base: null
165
+ num_mels: 80
166
+ win_length: null
167
+ window: hann
168
+ num_mels: 2
169
+ num_save_intermediate_results: 4
170
+ num_workers: 2
171
+ outdir: exp/tr_no_dev_vctk_hifigan_hubert_large_km50_24khz.v1
172
+ pin_memory: true
173
+ pretrain: ''
174
+ rank: 0
175
+ remove_short_samples: false
176
+ resume: exp/tr_no_dev_vctk_hifigan_hubert_large_km50_24khz.v1/checkpoint-150000steps.pkl
177
+ sampling_rate: 24000
178
+ save_interval_steps: 50000
179
+ train_dumpdir: dump_hu50/tr_no_dev/raw
180
+ train_feats_scp: null
181
+ train_max_steps: 2500000
182
+ train_segments: null
183
+ train_wav_scp: null
184
+ trim_frame_size: 1024
185
+ trim_hop_size: 256
186
+ trim_silence: false
187
+ trim_threshold_in_db: 20
188
+ use_feat_match_loss: true
189
+ use_mel_loss: true
190
+ use_stft_loss: false
191
+ verbose: 1
192
+ version: 0.5.1
193
+ win_length: null
194
+ window: null