pyf98 commited on
Commit
d14d8a6
1 Parent(s): c367154

add model files

Browse files
Files changed (20) hide show
  1. README.md +324 -0
  2. exp/asr_stats_raw_it_char/train/feats_stats.npz +3 -0
  3. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/RESULTS.md +29 -0
  4. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/config.yaml +223 -0
  5. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/acc.png +0 -0
  6. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/backward_time.png +0 -0
  7. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/cer.png +0 -0
  8. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/cer_ctc.png +0 -0
  9. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/forward_time.png +0 -0
  10. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/iter_time.png +0 -0
  12. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss.png +0 -0
  13. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss_att.png +0 -0
  14. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss_ctc.png +0 -0
  15. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/optim0_lr0.png +0 -0
  16. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/optim_step_time.png +0 -0
  17. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/train_time.png +0 -0
  18. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/wer.png +0 -0
  19. exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/valid.acc.ave_10best.pth +3 -0
  20. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: it
7
+ datasets:
8
+ - voxforge
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/voxforge_it_conformer_e15_linear1024`
15
+
16
+ This model was trained by Yifan Peng using voxforge recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout bf8c8f00194bdfed8ca388d8b20d14791b7d270e
26
+ pip install -e .
27
+ cd egs2/voxforge/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/voxforge_it_conformer_e15_linear1024
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Thu Dec 29 01:59:25 EST 2022`
35
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202211`
37
+ - pytorch version: `pytorch 1.12.1`
38
+ - Git hash: `bf8c8f00194bdfed8ca388d8b20d14791b7d270e`
39
+ - Commit date: `Wed Dec 28 22:43:13 2022 -0500`
40
+
41
+ ## asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_asr_model_valid.acc.ave/dt_it|1035|12587|70.2|24.6|5.2|3.3|33.1|94.7|
47
+ |decode_asr_asr_model_valid.acc.ave/et_it|1103|13699|71.9|23.3|4.8|2.9|31.0|92.4|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_asr_model_valid.acc.ave/dt_it|1035|75494|92.9|3.9|3.2|1.8|9.0|94.7|
54
+ |decode_asr_asr_model_valid.acc.ave/et_it|1103|81228|93.6|3.6|2.8|1.7|8.1|92.4|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+
61
+ ## ASR config
62
+
63
+ <details><summary>expand</summary>
64
+
65
+ ```
66
+ config: conf/tuning/train_asr_conformer_e15_linear1024.yaml
67
+ print_config: false
68
+ log_level: INFO
69
+ dry_run: false
70
+ iterator_type: sequence
71
+ output_dir: exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse
72
+ ngpu: 1
73
+ seed: 0
74
+ num_workers: 4
75
+ num_att_plot: 3
76
+ dist_backend: nccl
77
+ dist_init_method: env://
78
+ dist_world_size: null
79
+ dist_rank: null
80
+ local_rank: 0
81
+ dist_master_addr: null
82
+ dist_master_port: null
83
+ dist_launcher: null
84
+ multiprocessing_distributed: false
85
+ unused_parameters: false
86
+ sharded_ddp: false
87
+ cudnn_enabled: true
88
+ cudnn_benchmark: false
89
+ cudnn_deterministic: true
90
+ collect_stats: false
91
+ write_collected_feats: false
92
+ max_epoch: 100
93
+ patience: null
94
+ val_scheduler_criterion:
95
+ - valid
96
+ - loss
97
+ early_stopping_criterion:
98
+ - valid
99
+ - loss
100
+ - min
101
+ best_model_criterion:
102
+ - - valid
103
+ - acc
104
+ - max
105
+ keep_nbest_models: 10
106
+ nbest_averaging_interval: 0
107
+ grad_clip: 5.0
108
+ grad_clip_type: 2.0
109
+ grad_noise: false
110
+ accum_grad: 1
111
+ no_forward_run: false
112
+ resume: true
113
+ train_dtype: float32
114
+ use_amp: true
115
+ log_interval: null
116
+ use_matplotlib: true
117
+ use_tensorboard: true
118
+ create_graph_in_tensorboard: false
119
+ use_wandb: false
120
+ wandb_project: null
121
+ wandb_id: null
122
+ wandb_entity: null
123
+ wandb_name: null
124
+ wandb_model_log_interval: -1
125
+ detect_anomaly: false
126
+ pretrain_path: null
127
+ init_param: []
128
+ ignore_init_mismatch: false
129
+ freeze_param: []
130
+ num_iters_per_epoch: null
131
+ batch_size: 128
132
+ valid_batch_size: null
133
+ batch_bins: 1000000
134
+ valid_batch_bins: null
135
+ train_shape_file:
136
+ - exp/asr_stats_raw_it_char/train/speech_shape
137
+ - exp/asr_stats_raw_it_char/train/text_shape.char
138
+ valid_shape_file:
139
+ - exp/asr_stats_raw_it_char/valid/speech_shape
140
+ - exp/asr_stats_raw_it_char/valid/text_shape.char
141
+ batch_type: folded
142
+ valid_batch_type: null
143
+ fold_length:
144
+ - 80000
145
+ - 150
146
+ sort_in_batch: descending
147
+ sort_batch: descending
148
+ multiple_iterator: false
149
+ chunk_length: 500
150
+ chunk_shift_ratio: 0.5
151
+ num_cache_chunks: 1024
152
+ train_data_path_and_name_and_type:
153
+ - - dump/raw/tr_it/wav.scp
154
+ - speech
155
+ - sound
156
+ - - dump/raw/tr_it/text
157
+ - text
158
+ - text
159
+ valid_data_path_and_name_and_type:
160
+ - - dump/raw/dt_it/wav.scp
161
+ - speech
162
+ - sound
163
+ - - dump/raw/dt_it/text
164
+ - text
165
+ - text
166
+ allow_variable_data_keys: false
167
+ max_cache_size: 0.0
168
+ max_cache_fd: 32
169
+ valid_max_cache_size: null
170
+ optim: adam
171
+ optim_conf:
172
+ lr: 0.002
173
+ scheduler: warmuplr
174
+ scheduler_conf:
175
+ warmup_steps: 10000
176
+ token_list:
177
+ - <blank>
178
+ - <unk>
179
+ - <space>
180
+ - A
181
+ - E
182
+ - I
183
+ - O
184
+ - R
185
+ - N
186
+ - L
187
+ - S
188
+ - T
189
+ - C
190
+ - D
191
+ - U
192
+ - M
193
+ - P
194
+ - V
195
+ - G
196
+ - F
197
+ - H
198
+ - B
199
+ - Q
200
+ - Z
201
+ - ''''
202
+ - Ò
203
+ - À
204
+ - È
205
+ - Ú
206
+ - X
207
+ - W
208
+ - Í
209
+ - É
210
+ - Y
211
+ - K
212
+ - J
213
+ - '1'
214
+ - <sos/eos>
215
+ init: null
216
+ input_size: null
217
+ ctc_conf:
218
+ dropout_rate: 0.0
219
+ ctc_type: builtin
220
+ reduce: true
221
+ ignore_nan_grad: null
222
+ zero_infinity: true
223
+ joint_net_conf: null
224
+ use_preprocessor: true
225
+ token_type: char
226
+ bpemodel: null
227
+ non_linguistic_symbols: null
228
+ cleaner: null
229
+ g2p: null
230
+ speech_volume_normalize: null
231
+ rir_scp: null
232
+ rir_apply_prob: 1.0
233
+ noise_scp: null
234
+ noise_apply_prob: 1.0
235
+ noise_db_range: '13_15'
236
+ short_noise_thres: 0.5
237
+ frontend: default
238
+ frontend_conf:
239
+ fs: 16k
240
+ specaug: null
241
+ specaug_conf: {}
242
+ normalize: global_mvn
243
+ normalize_conf:
244
+ stats_file: exp/asr_stats_raw_it_char/train/feats_stats.npz
245
+ norm_vars: false
246
+ model: espnet
247
+ model_conf:
248
+ ctc_weight: 0.3
249
+ lsm_weight: 0.1
250
+ length_normalized_loss: false
251
+ preencoder: null
252
+ preencoder_conf: {}
253
+ encoder: conformer
254
+ encoder_conf:
255
+ output_size: 256
256
+ attention_heads: 4
257
+ linear_units: 1024
258
+ num_blocks: 15
259
+ dropout_rate: 0.1
260
+ positional_dropout_rate: 0.1
261
+ attention_dropout_rate: 0.1
262
+ input_layer: conv2d
263
+ normalize_before: true
264
+ macaron_style: true
265
+ rel_pos_type: latest
266
+ pos_enc_layer_type: rel_pos
267
+ selfattention_layer_type: rel_selfattn
268
+ activation_type: swish
269
+ use_cnn_module: true
270
+ cnn_module_kernel: 31
271
+ postencoder: null
272
+ postencoder_conf: {}
273
+ decoder: transformer
274
+ decoder_conf:
275
+ attention_heads: 4
276
+ linear_units: 2048
277
+ num_blocks: 6
278
+ dropout_rate: 0.1
279
+ positional_dropout_rate: 0.1
280
+ self_attention_dropout_rate: 0.0
281
+ src_attention_dropout_rate: 0.0
282
+ preprocessor: default
283
+ preprocessor_conf: {}
284
+ required:
285
+ - output_dir
286
+ - token_list
287
+ version: '202211'
288
+ distributed: false
289
+ ```
290
+
291
+ </details>
292
+
293
+
294
+
295
+ ### Citing ESPnet
296
+
297
+ ```BibTex
298
+ @inproceedings{watanabe2018espnet,
299
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
300
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
301
+ year={2018},
302
+ booktitle={Proceedings of Interspeech},
303
+ pages={2207--2211},
304
+ doi={10.21437/Interspeech.2018-1456},
305
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
306
+ }
307
+
308
+
309
+
310
+
311
+ ```
312
+
313
+ or arXiv:
314
+
315
+ ```bibtex
316
+ @misc{watanabe2018espnet,
317
+ title={ESPnet: End-to-End Speech Processing Toolkit},
318
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
319
+ year={2018},
320
+ eprint={1804.00015},
321
+ archivePrefix={arXiv},
322
+ primaryClass={cs.CL}
323
+ }
324
+ ```
exp/asr_stats_raw_it_char/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac4c6cee9a2bbf0175bb92bc350e21038c874b9d01b537b2688f5c8e1bae3d6
3
+ size 1402
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/RESULTS.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Dec 29 01:59:25 EST 2022`
5
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202211`
7
+ - pytorch version: `pytorch 1.12.1`
8
+ - Git hash: `bf8c8f00194bdfed8ca388d8b20d14791b7d270e`
9
+ - Commit date: `Wed Dec 28 22:43:13 2022 -0500`
10
+
11
+ ## asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_asr_model_valid.acc.ave/dt_it|1035|12587|70.2|24.6|5.2|3.3|33.1|94.7|
17
+ |decode_asr_asr_model_valid.acc.ave/et_it|1103|13699|71.9|23.3|4.8|2.9|31.0|92.4|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_asr_model_valid.acc.ave/dt_it|1035|75494|92.9|3.9|3.2|1.8|9.0|94.7|
24
+ |decode_asr_asr_model_valid.acc.ave/et_it|1103|81228|93.6|3.6|2.8|1.7|8.1|92.4|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/config.yaml ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_conformer_e15_linear1024.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 100
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: true
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 128
67
+ valid_batch_size: null
68
+ batch_bins: 1000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_raw_it_char/train/speech_shape
72
+ - exp/asr_stats_raw_it_char/train/text_shape.char
73
+ valid_shape_file:
74
+ - exp/asr_stats_raw_it_char/valid/speech_shape
75
+ - exp/asr_stats_raw_it_char/valid/text_shape.char
76
+ batch_type: folded
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/tr_it/wav.scp
89
+ - speech
90
+ - sound
91
+ - - dump/raw/tr_it/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/dt_it/wav.scp
96
+ - speech
97
+ - sound
98
+ - - dump/raw/dt_it/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.002
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 10000
111
+ token_list:
112
+ - <blank>
113
+ - <unk>
114
+ - <space>
115
+ - A
116
+ - E
117
+ - I
118
+ - O
119
+ - R
120
+ - N
121
+ - L
122
+ - S
123
+ - T
124
+ - C
125
+ - D
126
+ - U
127
+ - M
128
+ - P
129
+ - V
130
+ - G
131
+ - F
132
+ - H
133
+ - B
134
+ - Q
135
+ - Z
136
+ - ''''
137
+ - Ò
138
+ - À
139
+ - È
140
+ - Ú
141
+ - X
142
+ - W
143
+ - Í
144
+ - É
145
+ - Y
146
+ - K
147
+ - J
148
+ - '1'
149
+ - <sos/eos>
150
+ init: null
151
+ input_size: null
152
+ ctc_conf:
153
+ dropout_rate: 0.0
154
+ ctc_type: builtin
155
+ reduce: true
156
+ ignore_nan_grad: null
157
+ zero_infinity: true
158
+ joint_net_conf: null
159
+ use_preprocessor: true
160
+ token_type: char
161
+ bpemodel: null
162
+ non_linguistic_symbols: null
163
+ cleaner: null
164
+ g2p: null
165
+ speech_volume_normalize: null
166
+ rir_scp: null
167
+ rir_apply_prob: 1.0
168
+ noise_scp: null
169
+ noise_apply_prob: 1.0
170
+ noise_db_range: '13_15'
171
+ short_noise_thres: 0.5
172
+ frontend: default
173
+ frontend_conf:
174
+ fs: 16k
175
+ specaug: null
176
+ specaug_conf: {}
177
+ normalize: global_mvn
178
+ normalize_conf:
179
+ stats_file: exp/asr_stats_raw_it_char/train/feats_stats.npz
180
+ norm_vars: false
181
+ model: espnet
182
+ model_conf:
183
+ ctc_weight: 0.3
184
+ lsm_weight: 0.1
185
+ length_normalized_loss: false
186
+ preencoder: null
187
+ preencoder_conf: {}
188
+ encoder: conformer
189
+ encoder_conf:
190
+ output_size: 256
191
+ attention_heads: 4
192
+ linear_units: 1024
193
+ num_blocks: 15
194
+ dropout_rate: 0.1
195
+ positional_dropout_rate: 0.1
196
+ attention_dropout_rate: 0.1
197
+ input_layer: conv2d
198
+ normalize_before: true
199
+ macaron_style: true
200
+ rel_pos_type: latest
201
+ pos_enc_layer_type: rel_pos
202
+ selfattention_layer_type: rel_selfattn
203
+ activation_type: swish
204
+ use_cnn_module: true
205
+ cnn_module_kernel: 31
206
+ postencoder: null
207
+ postencoder_conf: {}
208
+ decoder: transformer
209
+ decoder_conf:
210
+ attention_heads: 4
211
+ linear_units: 2048
212
+ num_blocks: 6
213
+ dropout_rate: 0.1
214
+ positional_dropout_rate: 0.1
215
+ self_attention_dropout_rate: 0.0
216
+ src_attention_dropout_rate: 0.0
217
+ preprocessor: default
218
+ preprocessor_conf: {}
219
+ required:
220
+ - output_dir
221
+ - token_list
222
+ version: '202211'
223
+ distributed: false
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/acc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/backward_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/cer.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/cer_ctc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/forward_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/iter_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss_att.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/loss_ctc.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/optim0_lr0.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/optim_step_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/train_time.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/images/wer.png ADDED
exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557aba09cc97162703e876a6d3555aa976c4b67e19bbbe419a6b886c34c7e781
3
+ size 141098293
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/valid.acc.ave_10best.pth
4
+ python: "3.9.15 (main, Nov 24 2022, 14:31:59) \n[GCC 11.2.0]"
5
+ timestamp: 1672297168.240548
6
+ torch: 1.12.1
7
+ yaml_files:
8
+ asr_train_config: exp/asr_train_asr_conformer_e15_linear1024_raw_it_char_normalize_confnorm_varsFalse/config.yaml