richardbaihe commited on
Commit
0fff999
β€’
1 Parent(s): 47aee0d

add another checkpoint for new speaker testing

Browse files
Files changed (23) hide show
  1. README.md +2 -0
  2. config.yaml β†’ conformer/config.yaml +0 -0
  3. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640668877.asimov-233.svail.baidu.com.3180799.0 +0 -0
  4. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640668983.asimov-233.svail.baidu.com.3182300.0 +0 -0
  5. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640712190.asimov-233.svail.baidu.com.3350743.0 +0 -0
  6. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640755393.asimov-233.svail.baidu.com.3519148.0 +0 -0
  7. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640798603.asimov-233.svail.baidu.com.3687767.0 +0 -0
  8. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640841818.asimov-233.svail.baidu.com.3856172.0 +0 -0
  9. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640885046.asimov-233.svail.baidu.com.4024845.0 +0 -0
  10. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640928273.asimov-233.svail.baidu.com.4193180.0 +0 -0
  11. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640971480.asimov-233.svail.baidu.com.167852.0 +0 -0
  12. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641014641.asimov-232.svail.baidu.com.787833.0 +0 -0
  13. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641057725.asimov-236.svail.baidu.com.4183948.0 +0 -0
  14. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641100899.asimov-233.svail.baidu.com.399617.0 +0 -0
  15. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641144063.asimov-234.svail.baidu.com.3761978.0 +0 -0
  16. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641327032.asimov-232.svail.baidu.com.1156564.0 +0 -0
  17. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641370195.asimov-234.svail.baidu.com.2750880.0 +0 -0
  18. {tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641413008.asimov-232.svail.baidu.com.1356329.0 +0 -0
  19. train.loss.ave_5best.pth β†’ conformer/train.loss.ave_5best.pth +0 -0
  20. unseen_conformer/config.yaml +275 -0
  21. unseen_conformer/tensorboard/events.out.tfevents.1644880588.asimov-241.svail.baidu.com.2497085.0 +3 -0
  22. unseen_conformer/tensorboard/events.out.tfevents.1645491772.asimov-241.svail.baidu.com.539622.0 +3 -0
  23. unseen_conformer/train.loss.ave_5best.pth +3 -0
README.md CHANGED
@@ -1,3 +1,5 @@
1
  ---
2
  license: apache-2.0
 
 
3
  ---
 
1
  ---
2
  license: apache-2.0
3
+ - conformer: Conformer A3T trained with all VCTK training data.
4
+ - unseen_conformer: Conformer A3T trained by excluding some speakers during the training.
5
  ---
config.yaml β†’ conformer/config.yaml RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640668877.asimov-233.svail.baidu.com.3180799.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640668983.asimov-233.svail.baidu.com.3182300.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640712190.asimov-233.svail.baidu.com.3350743.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640755393.asimov-233.svail.baidu.com.3519148.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640798603.asimov-233.svail.baidu.com.3687767.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640841818.asimov-233.svail.baidu.com.3856172.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640885046.asimov-233.svail.baidu.com.4024845.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640928273.asimov-233.svail.baidu.com.4193180.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1640971480.asimov-233.svail.baidu.com.167852.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641014641.asimov-232.svail.baidu.com.787833.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641057725.asimov-236.svail.baidu.com.4183948.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641100899.asimov-233.svail.baidu.com.399617.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641144063.asimov-234.svail.baidu.com.3761978.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641327032.asimov-232.svail.baidu.com.1156564.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641370195.asimov-234.svail.baidu.com.2750880.0 RENAMED
File without changes
{tensorboard β†’ conformer/tensorboard}/events.out.tfevents.1641413008.asimov-232.svail.baidu.com.1356329.0 RENAMED
File without changes
train.loss.ave_5best.pth β†’ conformer/train.loss.ave_5best.pth RENAMED
File without changes
unseen_conformer/config.yaml ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/fsp2_conformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/unseen_conformer
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 0
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 8
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 50455
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1500
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ grad_clip: 1.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: 800
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 3000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/mlm_stats_raw_en_phn_g2p_en/train/speech_shape
72
+ valid_shape_file:
73
+ - exp/mlm_stats_raw_en_phn_g2p_en/valid/speech_shape
74
+ batch_type: numel
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 80000
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/unseen_tr_no_dev/mfa_wav.scp
86
+ - speech
87
+ - sound
88
+ - - dump/raw/unseen_tr_no_dev/mfa_text
89
+ - text
90
+ - text
91
+ - - dump/raw/unseen_tr_no_dev/mfa_start
92
+ - align_start
93
+ - text_float
94
+ - - dump/raw/unseen_tr_no_dev/mfa_end
95
+ - align_end
96
+ - text_float
97
+ valid_data_path_and_name_and_type:
98
+ - - dump/raw/dev/mfa_wav.scp
99
+ - speech
100
+ - sound
101
+ - - dump/raw/dev/mfa_text
102
+ - text
103
+ - text
104
+ - - dump/raw/dev/mfa_start
105
+ - align_start
106
+ - text_float
107
+ - - dump/raw/dev/mfa_end
108
+ - align_end
109
+ - text_float
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ valid_max_cache_size: null
114
+ optim: adam
115
+ optim_conf:
116
+ lr: 1.0
117
+ scheduler: noamlr
118
+ scheduler_conf:
119
+ model_size: 384
120
+ warmup_steps: 4000
121
+ token_list:
122
+ - <blank>
123
+ - <unk>
124
+ - AH0
125
+ - T
126
+ - N
127
+ - sp
128
+ - D
129
+ - S
130
+ - R
131
+ - L
132
+ - IH1
133
+ - DH
134
+ - AE1
135
+ - M
136
+ - EH1
137
+ - K
138
+ - Z
139
+ - W
140
+ - HH
141
+ - ER0
142
+ - AH1
143
+ - IY1
144
+ - P
145
+ - V
146
+ - F
147
+ - B
148
+ - AY1
149
+ - IY0
150
+ - EY1
151
+ - AA1
152
+ - AO1
153
+ - UW1
154
+ - IH0
155
+ - OW1
156
+ - NG
157
+ - G
158
+ - SH
159
+ - ER1
160
+ - Y
161
+ - TH
162
+ - AW1
163
+ - CH
164
+ - UH1
165
+ - IH2
166
+ - JH
167
+ - OW0
168
+ - EH2
169
+ - OY1
170
+ - AY2
171
+ - EH0
172
+ - EY2
173
+ - UW0
174
+ - AE2
175
+ - AA2
176
+ - OW2
177
+ - AH2
178
+ - ZH
179
+ - AO2
180
+ - IY2
181
+ - AE0
182
+ - UW2
183
+ - AY0
184
+ - AA0
185
+ - AO0
186
+ - AW2
187
+ - EY0
188
+ - UH2
189
+ - ER2
190
+ - OY2
191
+ - UH0
192
+ - AW0
193
+ - OY0
194
+ - <sos/eos>
195
+ init: xavier_uniform
196
+ input_size: 80
197
+ odim: null
198
+ model_conf:
199
+ lsm_weight: 0.1
200
+ length_normalized_loss: false
201
+ masking_schema: phn_span
202
+ mean_phn_span: 8
203
+ mlm_prob: 0.8
204
+ dynamic_mlm_prob: false
205
+ postnet_layers: 5
206
+ postnet_filts: 5
207
+ postnet_chans: 256
208
+ use_scaled_pos_enc: false
209
+ use_preprocessor: true
210
+ token_type: word
211
+ bpemodel: data/en_token_list_g2p_en/bpe_unigram5000/bpe.model
212
+ non_linguistic_symbols: null
213
+ cleaner: null
214
+ g2p: g2p_en
215
+ speech_volume_normalize: null
216
+ rir_scp: null
217
+ rir_apply_prob: 1.0
218
+ noise_scp: null
219
+ noise_apply_prob: 1.0
220
+ noise_db_range: '13_15'
221
+ feats_extract: fbank
222
+ feats_extract_conf:
223
+ n_fft: 2048
224
+ hop_length: 300
225
+ win_length: 1200
226
+ fs: 24000
227
+ fmin: 80
228
+ fmax: 7600
229
+ n_mels: 80
230
+ normalize: null
231
+ normalize_conf: {}
232
+ encoder: conformer
233
+ encoder_conf:
234
+ input_layer: sega_mlm
235
+ pre_speech_layer: 0
236
+ cnn_module_kernel: 7
237
+ attention_dim: 384
238
+ attention_heads: 2
239
+ linear_units: 1536
240
+ num_blocks: 4
241
+ dropout_rate: 0.2
242
+ positional_dropout_rate: 0.2
243
+ attention_dropout_rate: 0.2
244
+ normalize_before: true
245
+ macaron_style: true
246
+ use_cnn_module: true
247
+ selfattention_layer_type: legacy_rel_selfattn
248
+ activation_type: swish
249
+ pos_enc_layer_type: legacy_rel_pos
250
+ positionwise_layer_type: conv1d
251
+ positionwise_conv_kernel_size: 3
252
+ decoder: conformer
253
+ decoder_conf:
254
+ cnn_module_kernel: 31
255
+ attention_dim: 384
256
+ attention_heads: 2
257
+ linear_units: 1536
258
+ num_blocks: 4
259
+ dropout_rate: 0.2
260
+ positional_dropout_rate: 0.2
261
+ attention_dropout_rate: 0.2
262
+ macaron_style: true
263
+ use_cnn_module: true
264
+ selfattention_layer_type: legacy_rel_selfattn
265
+ activation_type: swish
266
+ pos_enc_layer_type: legacy_rel_pos
267
+ positionwise_layer_type: conv1d
268
+ positionwise_conv_kernel_size: 3
269
+ pre_decoder: linear
270
+ pre_decoder_conf: {}
271
+ required:
272
+ - output_dir
273
+ - token_list
274
+ version: 0.10.3a3
275
+ distributed: true
unseen_conformer/tensorboard/events.out.tfevents.1644880588.asimov-241.svail.baidu.com.2497085.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c4f02970c2071ab03e4dd1c726e25dc4429d1939978669f80fa5628a0f432e
3
+ size 12785416
unseen_conformer/tensorboard/events.out.tfevents.1645491772.asimov-241.svail.baidu.com.539622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de419c3a7ff2ca79ec5697e1b4a2ef4648169df59195975c142c4df33c55420a
3
+ size 450217
unseen_conformer/train.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:340f3584019dedeffa1c740c069491957e623b3571e5849f2bfab17a4a32249f
3
+ size 271257065