pyf98 commited on
Commit
002b093
1 Parent(s): 998005d

add model files

Browse files
Files changed (32) hide show
  1. README.md +350 -0
  2. data/nlsyms.txt +3 -0
  3. exp/asr_stats_raw_en_char/train/feats_stats.npz +3 -0
  4. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/RESULTS.md +29 -0
  5. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/config.yaml +249 -0
  6. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/acc.png +0 -0
  7. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/backward_time.png +0 -0
  8. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/cer.png +0 -0
  9. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/forward_time.png +0 -0
  11. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/iter_time.png +0 -0
  13. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss.png +0 -0
  14. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss_att.png +0 -0
  15. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/optim0_lr0.png +0 -0
  17. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/optim_step_time.png +0 -0
  18. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/train_time.png +0 -0
  19. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/wer.png +0 -0
  20. exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/valid.acc.ave_10best.pth +3 -0
  21. exp/lm_train_lm_transformer_en_char/config.yaml +190 -0
  22. exp/lm_train_lm_transformer_en_char/images/backward_time.png +0 -0
  23. exp/lm_train_lm_transformer_en_char/images/forward_time.png +0 -0
  24. exp/lm_train_lm_transformer_en_char/images/gpu_max_cached_mem_GB.png +0 -0
  25. exp/lm_train_lm_transformer_en_char/images/iter_time.png +0 -0
  26. exp/lm_train_lm_transformer_en_char/images/loss.png +0 -0
  27. exp/lm_train_lm_transformer_en_char/images/optim0_lr0.png +0 -0
  28. exp/lm_train_lm_transformer_en_char/images/optim_step_time.png +0 -0
  29. exp/lm_train_lm_transformer_en_char/images/train_time.png +0 -0
  30. exp/lm_train_lm_transformer_en_char/perplexity_test/ppl +1 -0
  31. exp/lm_train_lm_transformer_en_char/valid.loss.ave_10best.pth +3 -0
  32. meta.yaml +10 -0
README.md ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - wsj
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/wsj_conformer_e15_linear1024`
15
+
16
+ This model was trained by Yifan Peng using wsj recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 0aa06d0535323aabc1d8b057f8769da377f4d9ff
26
+ pip install -e .
27
+ cd egs2/wsj/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/wsj_conformer_e15_linear1024
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Wed Dec 28 00:25:25 EST 2022`
35
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202211`
37
+ - pytorch version: `pytorch 1.12.1`
38
+ - Git hash: `0aa06d0535323aabc1d8b057f8769da377f4d9ff`
39
+ - Commit date: `Tue Dec 27 15:08:25 2022 -0600`
40
+
41
+ ## asr_train_asr_conformer_e15_linear1024_raw_en_char
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_dev93|503|8234|94.2|5.1|0.8|0.7|6.5|52.5|
47
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_eval92|333|5643|96.4|3.3|0.3|0.6|4.1|37.2|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_dev93|503|48634|97.8|1.0|1.2|0.6|2.8|58.3|
54
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_eval92|333|33341|98.6|0.7|0.7|0.5|1.9|46.8|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+
61
+ ## ASR config
62
+
63
+ <details><summary>expand</summary>
64
+
65
+ ```
66
+ config: conf/tuning/train_asr_conformer_e15_linear1024.yaml
67
+ print_config: false
68
+ log_level: INFO
69
+ dry_run: false
70
+ iterator_type: sequence
71
+ output_dir: exp/asr_train_asr_conformer_e15_linear1024_raw_en_char
72
+ ngpu: 1
73
+ seed: 0
74
+ num_workers: 4
75
+ num_att_plot: 3
76
+ dist_backend: nccl
77
+ dist_init_method: env://
78
+ dist_world_size: null
79
+ dist_rank: null
80
+ local_rank: 0
81
+ dist_master_addr: null
82
+ dist_master_port: null
83
+ dist_launcher: null
84
+ multiprocessing_distributed: false
85
+ unused_parameters: false
86
+ sharded_ddp: false
87
+ cudnn_enabled: true
88
+ cudnn_benchmark: false
89
+ cudnn_deterministic: true
90
+ collect_stats: false
91
+ write_collected_feats: false
92
+ max_epoch: 100
93
+ patience: null
94
+ val_scheduler_criterion:
95
+ - valid
96
+ - loss
97
+ early_stopping_criterion:
98
+ - valid
99
+ - loss
100
+ - min
101
+ best_model_criterion:
102
+ - - valid
103
+ - acc
104
+ - max
105
+ keep_nbest_models: 10
106
+ nbest_averaging_interval: 0
107
+ grad_clip: 5.0
108
+ grad_clip_type: 2.0
109
+ grad_noise: false
110
+ accum_grad: 2
111
+ no_forward_run: false
112
+ resume: true
113
+ train_dtype: float32
114
+ use_amp: true
115
+ log_interval: 100
116
+ use_matplotlib: true
117
+ use_tensorboard: true
118
+ create_graph_in_tensorboard: false
119
+ use_wandb: false
120
+ wandb_project: null
121
+ wandb_id: null
122
+ wandb_entity: null
123
+ wandb_name: null
124
+ wandb_model_log_interval: -1
125
+ detect_anomaly: false
126
+ pretrain_path: null
127
+ init_param: []
128
+ ignore_init_mismatch: false
129
+ freeze_param: []
130
+ num_iters_per_epoch: null
131
+ batch_size: 128
132
+ valid_batch_size: null
133
+ batch_bins: 1000000
134
+ valid_batch_bins: null
135
+ train_shape_file:
136
+ - exp/asr_stats_raw_en_char/train/speech_shape
137
+ - exp/asr_stats_raw_en_char/train/text_shape.char
138
+ valid_shape_file:
139
+ - exp/asr_stats_raw_en_char/valid/speech_shape
140
+ - exp/asr_stats_raw_en_char/valid/text_shape.char
141
+ batch_type: folded
142
+ valid_batch_type: null
143
+ fold_length:
144
+ - 80000
145
+ - 150
146
+ sort_in_batch: descending
147
+ sort_batch: descending
148
+ multiple_iterator: false
149
+ chunk_length: 500
150
+ chunk_shift_ratio: 0.5
151
+ num_cache_chunks: 1024
152
+ train_data_path_and_name_and_type:
153
+ - - dump/raw/train_si284/wav.scp
154
+ - speech
155
+ - sound
156
+ - - dump/raw/train_si284/text
157
+ - text
158
+ - text
159
+ valid_data_path_and_name_and_type:
160
+ - - dump/raw/test_dev93/wav.scp
161
+ - speech
162
+ - sound
163
+ - - dump/raw/test_dev93/text
164
+ - text
165
+ - text
166
+ allow_variable_data_keys: false
167
+ max_cache_size: 0.0
168
+ max_cache_fd: 32
169
+ valid_max_cache_size: null
170
+ optim: adam
171
+ optim_conf:
172
+ lr: 0.005
173
+ scheduler: warmuplr
174
+ scheduler_conf:
175
+ warmup_steps: 30000
176
+ token_list:
177
+ - <blank>
178
+ - <unk>
179
+ - <space>
180
+ - E
181
+ - T
182
+ - A
183
+ - N
184
+ - I
185
+ - O
186
+ - S
187
+ - R
188
+ - H
189
+ - L
190
+ - D
191
+ - C
192
+ - U
193
+ - M
194
+ - P
195
+ - F
196
+ - G
197
+ - Y
198
+ - W
199
+ - B
200
+ - V
201
+ - K
202
+ - .
203
+ - X
204
+ - ''''
205
+ - J
206
+ - Q
207
+ - Z
208
+ - <NOISE>
209
+ - ','
210
+ - '-'
211
+ - '"'
212
+ - '*'
213
+ - ':'
214
+ - (
215
+ - )
216
+ - '?'
217
+ - '!'
218
+ - '&'
219
+ - ;
220
+ - '1'
221
+ - '2'
222
+ - '0'
223
+ - /
224
+ - $
225
+ - '{'
226
+ - '}'
227
+ - '8'
228
+ - '9'
229
+ - '6'
230
+ - '3'
231
+ - '5'
232
+ - '7'
233
+ - '4'
234
+ - '~'
235
+ - '`'
236
+ - _
237
+ - <*IN*>
238
+ - <*MR.*>
239
+ - \
240
+ - ^
241
+ - <sos/eos>
242
+ init: null
243
+ input_size: null
244
+ ctc_conf:
245
+ dropout_rate: 0.0
246
+ ctc_type: builtin
247
+ reduce: true
248
+ ignore_nan_grad: null
249
+ zero_infinity: true
250
+ joint_net_conf: null
251
+ use_preprocessor: true
252
+ token_type: char
253
+ bpemodel: null
254
+ non_linguistic_symbols: data/nlsyms.txt
255
+ cleaner: null
256
+ g2p: null
257
+ speech_volume_normalize: null
258
+ rir_scp: null
259
+ rir_apply_prob: 1.0
260
+ noise_scp: null
261
+ noise_apply_prob: 1.0
262
+ noise_db_range: '13_15'
263
+ short_noise_thres: 0.5
264
+ frontend: default
265
+ frontend_conf:
266
+ fs: 16k
267
+ specaug: null
268
+ specaug_conf: {}
269
+ normalize: global_mvn
270
+ normalize_conf:
271
+ stats_file: exp/asr_stats_raw_en_char/train/feats_stats.npz
272
+ model: espnet
273
+ model_conf:
274
+ ctc_weight: 0.3
275
+ lsm_weight: 0.1
276
+ length_normalized_loss: false
277
+ preencoder: null
278
+ preencoder_conf: {}
279
+ encoder: conformer
280
+ encoder_conf:
281
+ output_size: 256
282
+ attention_heads: 4
283
+ linear_units: 1024
284
+ num_blocks: 15
285
+ dropout_rate: 0.1
286
+ positional_dropout_rate: 0.1
287
+ attention_dropout_rate: 0.1
288
+ input_layer: conv2d
289
+ normalize_before: true
290
+ macaron_style: true
291
+ rel_pos_type: latest
292
+ pos_enc_layer_type: rel_pos
293
+ selfattention_layer_type: rel_selfattn
294
+ activation_type: swish
295
+ use_cnn_module: true
296
+ cnn_module_kernel: 31
297
+ postencoder: null
298
+ postencoder_conf: {}
299
+ decoder: transformer
300
+ decoder_conf:
301
+ attention_heads: 4
302
+ linear_units: 2048
303
+ num_blocks: 6
304
+ dropout_rate: 0.1
305
+ positional_dropout_rate: 0.1
306
+ self_attention_dropout_rate: 0.0
307
+ src_attention_dropout_rate: 0.0
308
+ preprocessor: default
309
+ preprocessor_conf: {}
310
+ required:
311
+ - output_dir
312
+ - token_list
313
+ version: '202211'
314
+ distributed: false
315
+ ```
316
+
317
+ </details>
318
+
319
+
320
+
321
+ ### Citing ESPnet
322
+
323
+ ```BibTex
324
+ @inproceedings{watanabe2018espnet,
325
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
326
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
327
+ year={2018},
328
+ booktitle={Proceedings of Interspeech},
329
+ pages={2207--2211},
330
+ doi={10.21437/Interspeech.2018-1456},
331
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
332
+ }
333
+
334
+
335
+
336
+
337
+ ```
338
+
339
+ or arXiv:
340
+
341
+ ```bibtex
342
+ @misc{watanabe2018espnet,
343
+ title={ESPnet: End-to-End Speech Processing Toolkit},
344
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
345
+ year={2018},
346
+ eprint={1804.00015},
347
+ archivePrefix={arXiv},
348
+ primaryClass={cs.CL}
349
+ }
350
+ ```
data/nlsyms.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ <*IN*>
2
+ <*MR.*>
3
+ <NOISE>
exp/asr_stats_raw_en_char/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea03fa4eea91ad6b7e047a7572be73ed998be1896e389935de240c68ccc1931
3
+ size 1402
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/RESULTS.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Dec 28 00:25:25 EST 2022`
5
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202211`
7
+ - pytorch version: `pytorch 1.12.1`
8
+ - Git hash: `0aa06d0535323aabc1d8b057f8769da377f4d9ff`
9
+ - Commit date: `Tue Dec 27 15:08:25 2022 -0600`
10
+
11
+ ## asr_train_asr_conformer_e15_linear1024_raw_en_char
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_dev93|503|8234|94.2|5.1|0.8|0.7|6.5|52.5|
17
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_eval92|333|5643|96.4|3.3|0.3|0.6|4.1|37.2|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_dev93|503|48634|97.8|1.0|1.2|0.6|2.8|58.3|
24
+ |decode_lm_lm_train_lm_transformer_en_char_valid.loss.ave_asr_model_valid.acc.ave/test_eval92|333|33341|98.6|0.7|0.7|0.5|1.9|46.8|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/config.yaml ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_conformer_e15_linear1024.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_conformer_e15_linear1024_raw_en_char
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 100
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 2
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: true
50
+ log_interval: 100
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 128
67
+ valid_batch_size: null
68
+ batch_bins: 1000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_raw_en_char/train/speech_shape
72
+ - exp/asr_stats_raw_en_char/train/text_shape.char
73
+ valid_shape_file:
74
+ - exp/asr_stats_raw_en_char/valid/speech_shape
75
+ - exp/asr_stats_raw_en_char/valid/text_shape.char
76
+ batch_type: folded
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/train_si284/wav.scp
89
+ - speech
90
+ - sound
91
+ - - dump/raw/train_si284/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/test_dev93/wav.scp
96
+ - speech
97
+ - sound
98
+ - - dump/raw/test_dev93/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.005
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 30000
111
+ token_list:
112
+ - <blank>
113
+ - <unk>
114
+ - <space>
115
+ - E
116
+ - T
117
+ - A
118
+ - N
119
+ - I
120
+ - O
121
+ - S
122
+ - R
123
+ - H
124
+ - L
125
+ - D
126
+ - C
127
+ - U
128
+ - M
129
+ - P
130
+ - F
131
+ - G
132
+ - Y
133
+ - W
134
+ - B
135
+ - V
136
+ - K
137
+ - .
138
+ - X
139
+ - ''''
140
+ - J
141
+ - Q
142
+ - Z
143
+ - <NOISE>
144
+ - ','
145
+ - '-'
146
+ - '"'
147
+ - '*'
148
+ - ':'
149
+ - (
150
+ - )
151
+ - '?'
152
+ - '!'
153
+ - '&'
154
+ - ;
155
+ - '1'
156
+ - '2'
157
+ - '0'
158
+ - /
159
+ - $
160
+ - '{'
161
+ - '}'
162
+ - '8'
163
+ - '9'
164
+ - '6'
165
+ - '3'
166
+ - '5'
167
+ - '7'
168
+ - '4'
169
+ - '~'
170
+ - '`'
171
+ - _
172
+ - <*IN*>
173
+ - <*MR.*>
174
+ - \
175
+ - ^
176
+ - <sos/eos>
177
+ init: null
178
+ input_size: null
179
+ ctc_conf:
180
+ dropout_rate: 0.0
181
+ ctc_type: builtin
182
+ reduce: true
183
+ ignore_nan_grad: null
184
+ zero_infinity: true
185
+ joint_net_conf: null
186
+ use_preprocessor: true
187
+ token_type: char
188
+ bpemodel: null
189
+ non_linguistic_symbols: data/nlsyms.txt
190
+ cleaner: null
191
+ g2p: null
192
+ speech_volume_normalize: null
193
+ rir_scp: null
194
+ rir_apply_prob: 1.0
195
+ noise_scp: null
196
+ noise_apply_prob: 1.0
197
+ noise_db_range: '13_15'
198
+ short_noise_thres: 0.5
199
+ frontend: default
200
+ frontend_conf:
201
+ fs: 16k
202
+ specaug: null
203
+ specaug_conf: {}
204
+ normalize: global_mvn
205
+ normalize_conf:
206
+ stats_file: exp/asr_stats_raw_en_char/train/feats_stats.npz
207
+ model: espnet
208
+ model_conf:
209
+ ctc_weight: 0.3
210
+ lsm_weight: 0.1
211
+ length_normalized_loss: false
212
+ preencoder: null
213
+ preencoder_conf: {}
214
+ encoder: conformer
215
+ encoder_conf:
216
+ output_size: 256
217
+ attention_heads: 4
218
+ linear_units: 1024
219
+ num_blocks: 15
220
+ dropout_rate: 0.1
221
+ positional_dropout_rate: 0.1
222
+ attention_dropout_rate: 0.1
223
+ input_layer: conv2d
224
+ normalize_before: true
225
+ macaron_style: true
226
+ rel_pos_type: latest
227
+ pos_enc_layer_type: rel_pos
228
+ selfattention_layer_type: rel_selfattn
229
+ activation_type: swish
230
+ use_cnn_module: true
231
+ cnn_module_kernel: 31
232
+ postencoder: null
233
+ postencoder_conf: {}
234
+ decoder: transformer
235
+ decoder_conf:
236
+ attention_heads: 4
237
+ linear_units: 2048
238
+ num_blocks: 6
239
+ dropout_rate: 0.1
240
+ positional_dropout_rate: 0.1
241
+ self_attention_dropout_rate: 0.0
242
+ src_attention_dropout_rate: 0.0
243
+ preprocessor: default
244
+ preprocessor_conf: {}
245
+ required:
246
+ - output_dir
247
+ - token_list
248
+ version: '202211'
249
+ distributed: false
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/acc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/backward_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/cer.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/cer_ctc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/forward_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/iter_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss_att.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/loss_ctc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/optim0_lr0.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/optim_step_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/train_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/images/wer.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ec12d91bb5144564f260c2391126e9edc6546205dd10d28b36c2a9fb61d785
3
+ size 141181493
exp/lm_train_lm_transformer_en_char/config.yaml ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/lm_train_lm_transformer_en_char
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 2
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 44469
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 25
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 2
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 350000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/lm_stats_en_char/train/text_shape.char
72
+ valid_shape_file:
73
+ - exp/lm_stats_en_char/valid/text_shape.char
74
+ batch_type: numel
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 150
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/lm_train.txt
86
+ - text
87
+ - text
88
+ valid_data_path_and_name_and_type:
89
+ - - dump/raw/test_dev93/text
90
+ - text
91
+ - text
92
+ allow_variable_data_keys: false
93
+ max_cache_size: 0.0
94
+ max_cache_fd: 32
95
+ valid_max_cache_size: null
96
+ optim: adam
97
+ optim_conf:
98
+ lr: 0.001
99
+ scheduler: warmuplr
100
+ scheduler_conf:
101
+ warmup_steps: 25000
102
+ token_list:
103
+ - <blank>
104
+ - <unk>
105
+ - <space>
106
+ - E
107
+ - T
108
+ - A
109
+ - N
110
+ - I
111
+ - O
112
+ - S
113
+ - R
114
+ - H
115
+ - L
116
+ - D
117
+ - C
118
+ - U
119
+ - M
120
+ - P
121
+ - F
122
+ - G
123
+ - Y
124
+ - W
125
+ - B
126
+ - V
127
+ - K
128
+ - .
129
+ - X
130
+ - ''''
131
+ - J
132
+ - Q
133
+ - Z
134
+ - <NOISE>
135
+ - ','
136
+ - '-'
137
+ - '"'
138
+ - '*'
139
+ - ':'
140
+ - (
141
+ - )
142
+ - '?'
143
+ - '!'
144
+ - '&'
145
+ - ;
146
+ - '1'
147
+ - '2'
148
+ - '0'
149
+ - /
150
+ - $
151
+ - '{'
152
+ - '}'
153
+ - '8'
154
+ - '9'
155
+ - '6'
156
+ - '3'
157
+ - '5'
158
+ - '7'
159
+ - '4'
160
+ - '~'
161
+ - '`'
162
+ - _
163
+ - <*IN*>
164
+ - <*MR.*>
165
+ - \
166
+ - ^
167
+ - <sos/eos>
168
+ init: null
169
+ model_conf:
170
+ ignore_id: 0
171
+ use_preprocessor: true
172
+ token_type: char
173
+ bpemodel: null
174
+ non_linguistic_symbols: data/nlsyms.txt
175
+ cleaner: null
176
+ g2p: null
177
+ lm: transformer
178
+ lm_conf:
179
+ pos_enc: null
180
+ embed_unit: 128
181
+ att_unit: 512
182
+ head: 8
183
+ unit: 2048
184
+ layer: 16
185
+ dropout_rate: 0.1
186
+ required:
187
+ - output_dir
188
+ - token_list
189
+ version: '202211'
190
+ distributed: true
exp/lm_train_lm_transformer_en_char/images/backward_time.png ADDED
exp/lm_train_lm_transformer_en_char/images/forward_time.png ADDED
exp/lm_train_lm_transformer_en_char/images/gpu_max_cached_mem_GB.png ADDED
exp/lm_train_lm_transformer_en_char/images/iter_time.png ADDED
exp/lm_train_lm_transformer_en_char/images/loss.png ADDED
exp/lm_train_lm_transformer_en_char/images/optim0_lr0.png ADDED
exp/lm_train_lm_transformer_en_char/images/optim_step_time.png ADDED
exp/lm_train_lm_transformer_en_char/images/train_time.png ADDED
exp/lm_train_lm_transformer_en_char/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 2.2880849662126233
exp/lm_train_lm_transformer_en_char/valid.loss.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e700b5a85868956df7aae5581f76e3a66115d1bdf2ee031b51454dc21a7010db
3
+ size 202290031
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/valid.acc.ave_10best.pth
4
+ lm_file: exp/lm_train_lm_transformer_en_char/valid.loss.ave_10best.pth
5
+ python: "3.9.15 (main, Nov 24 2022, 14:31:59) \n[GCC 11.2.0]"
6
+ timestamp: 1672205127.39957
7
+ torch: 1.12.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_asr_conformer_e15_linear1024_raw_en_char/config.yaml
10
+ lm_train_config: exp/lm_train_lm_transformer_en_char/config.yaml