simpleoier commited on
Commit
a4366c5
1 Parent(s): 8d1b051

Update model

Browse files
Files changed (33) hide show
  1. README.md +368 -0
  2. meta.yaml +10 -0
  3. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/12epoch.pth +3 -0
  4. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/config.yaml +189 -0
  5. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/backward_time.png +0 -0
  6. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/forward_time.png +0 -0
  7. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/gpu_max_cached_mem_GB.png +0 -0
  8. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/iter_time.png +0 -0
  9. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/loss.png +0 -0
  10. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/optim0_lr0.png +0 -0
  11. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/optim_step_time.png +0 -0
  12. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/train_time.png +0 -0
  13. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/perplexity_test/ppl +1 -0
  14. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/data/nlsyms.txt +3 -0
  15. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/RESULTS.md +49 -0
  16. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/RESULTS_enh.md +19 -0
  17. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/config.yaml +250 -0
  18. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/acc.png +0 -0
  19. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/backward_time.png +0 -0
  20. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/cer.png +0 -0
  21. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/cer_ctc.png +0 -0
  22. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/forward_time.png +0 -0
  23. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/gpu_max_cached_mem_GB.png +0 -0
  24. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/iter_time.png +0 -0
  25. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss.png +0 -0
  26. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_att.png +0 -0
  27. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_ctc.png +0 -0
  28. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_enh.png +0 -0
  29. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/optim0_lr0.png +0 -0
  30. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/optim_step_time.png +0 -0
  31. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/train_time.png +0 -0
  32. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/wer.png +0 -0
  33. ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/valid.acc.ave_10best.pth +3 -0
README.md ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - speech-enhancement-recognition
6
+ language: en
7
+ datasets:
8
+ - chime4
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 EnhS2T model
13
+
14
+ ### `espnet/simpleoier_chime4_enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char`
15
+
16
+ This model was trained by simpleoier using chime4 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout 44971ff962aae30c962226f1ba3d87de057ac00e
23
+ pip install -e .
24
+ cd egs2/chime4/enh_asr1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/simpleoier_chime4_enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Thu Apr 28 00:09:17 EDT 2022`
32
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
33
+ - espnet version: `espnet 202204`
34
+ - pytorch version: `pytorch 1.8.1`
35
+ - Git hash: ``
36
+ - Commit date: ``
37
+
38
+ ## enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
39
+ ### WER
40
+
41
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
+ |---|---|---|---|---|---|---|---|---|
43
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|27119|93.0|5.2|1.8|0.6|7.7|53.3|
44
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|27119|93.9|4.5|1.6|0.5|6.7|49.9|
45
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|27119|91.8|6.0|2.2|0.8|9.0|57.7|
46
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|27120|92.2|6.0|1.9|0.7|8.6|55.5|
47
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|27120|93.6|4.9|1.5|0.6|7.1|51.6|
48
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|27120|89.9|7.6|2.4|1.0|11.1|59.7|
49
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|21409|86.7|9.7|3.5|1.3|14.5|64.7|
50
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|21409|89.2|7.9|2.9|1.0|11.8|61.2|
51
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|21409|84.6|11.4|4.0|1.5|17.0|69.4|
52
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|21416|86.0|10.5|3.5|1.5|15.5|67.5|
53
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|21416|88.1|8.9|3.1|1.2|13.1|64.8|
54
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|21416|82.8|13.1|4.1|1.9|19.1|69.4|
55
+
56
+ ### CER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|160390|96.6|1.4|2.0|0.6|4.0|53.3|
61
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|160390|97.1|1.1|1.8|0.5|3.4|49.9|
62
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|160390|95.9|1.7|2.3|0.8|4.8|57.7|
63
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|160400|95.9|1.7|2.3|0.7|4.8|55.5|
64
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|160400|96.8|1.4|1.9|0.6|3.8|51.6|
65
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|160400|94.7|2.5|2.9|1.0|6.3|59.7|
66
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|126796|92.8|3.2|4.0|1.2|8.4|64.7|
67
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|126796|94.3|2.4|3.3|1.0|6.6|61.2|
68
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|126796|91.5|3.8|4.6|1.6|10.0|69.4|
69
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|126812|92.2|3.5|4.2|1.7|9.5|67.5|
70
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|126812|93.7|2.7|3.5|1.4|7.7|64.8|
71
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|126812|90.3|4.8|4.9|2.2|11.9|69.4|
72
+
73
+ ### TER
74
+
75
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
76
+ |---|---|---|---|---|---|---|---|---|
77
+
78
+ ## EnhS2T config
79
+
80
+ <details><summary>expand</summary>
81
+
82
+ ```
83
+ config: conf/train_enh_asr_convtasnet_fbank_transformer.yaml
84
+ print_config: false
85
+ log_level: INFO
86
+ dry_run: false
87
+ iterator_type: sequence
88
+ output_dir: exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
89
+ ngpu: 1
90
+ seed: 0
91
+ num_workers: 1
92
+ num_att_plot: 0
93
+ dist_backend: nccl
94
+ dist_init_method: env://
95
+ dist_world_size: null
96
+ dist_rank: null
97
+ local_rank: 0
98
+ dist_master_addr: null
99
+ dist_master_port: null
100
+ dist_launcher: null
101
+ multiprocessing_distributed: false
102
+ unused_parameters: false
103
+ sharded_ddp: false
104
+ cudnn_enabled: true
105
+ cudnn_benchmark: false
106
+ cudnn_deterministic: true
107
+ collect_stats: false
108
+ write_collected_feats: false
109
+ max_epoch: 50
110
+ patience: 5
111
+ val_scheduler_criterion:
112
+ - valid
113
+ - loss
114
+ early_stopping_criterion:
115
+ - valid
116
+ - loss
117
+ - min
118
+ best_model_criterion:
119
+ - - valid
120
+ - acc
121
+ - max
122
+ - - train
123
+ - loss
124
+ - min
125
+ keep_nbest_models: 10
126
+ nbest_averaging_interval: 0
127
+ grad_clip: 5
128
+ grad_clip_type: 2.0
129
+ grad_noise: false
130
+ accum_grad: 2
131
+ no_forward_run: false
132
+ resume: true
133
+ train_dtype: float32
134
+ use_amp: false
135
+ log_interval: null
136
+ use_matplotlib: true
137
+ use_tensorboard: true
138
+ use_wandb: false
139
+ wandb_project: null
140
+ wandb_id: null
141
+ wandb_entity: null
142
+ wandb_name: null
143
+ wandb_model_log_interval: -1
144
+ detect_anomaly: false
145
+ pretrain_path: null
146
+ init_param: []
147
+ ignore_init_mismatch: false
148
+ freeze_param: []
149
+ num_iters_per_epoch: null
150
+ batch_size: 16
151
+ valid_batch_size: null
152
+ batch_bins: 1000000
153
+ valid_batch_bins: null
154
+ train_shape_file:
155
+ - exp/enh_asr_stats_raw_en_char/train/speech_shape
156
+ - exp/enh_asr_stats_raw_en_char/train/speech_ref1_shape
157
+ - exp/enh_asr_stats_raw_en_char/train/text_shape.char
158
+ valid_shape_file:
159
+ - exp/enh_asr_stats_raw_en_char/valid/speech_shape
160
+ - exp/enh_asr_stats_raw_en_char/valid/speech_ref1_shape
161
+ - exp/enh_asr_stats_raw_en_char/valid/text_shape.char
162
+ batch_type: folded
163
+ valid_batch_type: null
164
+ fold_length:
165
+ - 80000
166
+ - 80000
167
+ - 150
168
+ sort_in_batch: descending
169
+ sort_batch: descending
170
+ multiple_iterator: false
171
+ chunk_length: 500
172
+ chunk_shift_ratio: 0.5
173
+ num_cache_chunks: 1024
174
+ train_data_path_and_name_and_type:
175
+ - - dump/raw/tr05_multi_noisy_si284/wav.scp
176
+ - speech
177
+ - sound
178
+ - - dump/raw/tr05_multi_noisy_si284/spk1.scp
179
+ - speech_ref1
180
+ - sound
181
+ - - dump/raw/tr05_multi_noisy_si284/text
182
+ - text
183
+ - text
184
+ valid_data_path_and_name_and_type:
185
+ - - dump/raw/dt05_multi_isolated_1ch_track/wav.scp
186
+ - speech
187
+ - sound
188
+ - - dump/raw/dt05_multi_isolated_1ch_track/spk1.scp
189
+ - speech_ref1
190
+ - sound
191
+ - - dump/raw/dt05_multi_isolated_1ch_track/text
192
+ - text
193
+ - text
194
+ allow_variable_data_keys: false
195
+ max_cache_size: 0.0
196
+ max_cache_fd: 32
197
+ valid_max_cache_size: null
198
+ optim: adam
199
+ optim_conf:
200
+ lr: 0.002
201
+ scheduler: warmuplr
202
+ scheduler_conf:
203
+ warmup_steps: 20000
204
+ token_list: data/en_token_list/char/tokens.txt
205
+ src_token_list: null
206
+ init: xavier_uniform
207
+ input_size: null
208
+ ctc_conf:
209
+ dropout_rate: 0.0
210
+ ctc_type: builtin
211
+ reduce: true
212
+ ignore_nan_grad: true
213
+ enh_criterions:
214
+ - name: si_snr
215
+ conf:
216
+ eps: 1e-7
217
+ wrapper: fixed_order
218
+ wrapper_conf:
219
+ weight: 1.0
220
+ enh_model_conf:
221
+ stft_consistency: false
222
+ loss_type: mask_mse
223
+ mask_type: null
224
+ asr_model_conf:
225
+ ctc_weight: 0.3
226
+ lsm_weight: 0.1
227
+ length_normalized_loss: false
228
+ extract_feats_in_collect_stats: false
229
+ st_model_conf:
230
+ stft_consistency: false
231
+ loss_type: mask_mse
232
+ mask_type: null
233
+ subtask_series:
234
+ - enh
235
+ - asr
236
+ model_conf:
237
+ bypass_enh_prob: 0.0
238
+ use_preprocessor: true
239
+ token_type: char
240
+ bpemodel: null
241
+ src_token_type: bpe
242
+ src_bpemodel: null
243
+ non_linguistic_symbols: data/nlsyms.txt
244
+ cleaner: null
245
+ g2p: null
246
+ enh_encoder: conv
247
+ enh_encoder_conf:
248
+ channel: 256
249
+ kernel_size: 40
250
+ stride: 20
251
+ enh_separator: tcn
252
+ enh_separator_conf:
253
+ num_spk: 1
254
+ layer: 4
255
+ stack: 2
256
+ bottleneck_dim: 256
257
+ hidden_dim: 512
258
+ kernel: 3
259
+ causal: false
260
+ norm_type: gLN
261
+ nonlinear: relu
262
+ enh_decoder: conv
263
+ enh_decoder_conf:
264
+ channel: 256
265
+ kernel_size: 40
266
+ stride: 20
267
+ frontend: default
268
+ frontend_conf:
269
+ fs: 16k
270
+ n_fft: 512
271
+ win_length: 400
272
+ hop_length: 160
273
+ frontend_conf: null
274
+ apply_stft: true
275
+ specaug: specaug
276
+ specaug_conf:
277
+ apply_time_warp: true
278
+ time_warp_window: 5
279
+ time_warp_mode: bicubic
280
+ apply_freq_mask: true
281
+ freq_mask_width_range:
282
+ - 0
283
+ - 30
284
+ num_freq_mask: 2
285
+ apply_time_mask: true
286
+ time_mask_width_range:
287
+ - 0
288
+ - 40
289
+ num_time_mask: 2
290
+ normalize: utterance_mvn
291
+ normalize_conf: {}
292
+ asr_preencoder: null
293
+ asr_preencoder_conf: {}
294
+ asr_encoder: transformer
295
+ asr_encoder_conf:
296
+ output_size: 256
297
+ attention_heads: 4
298
+ linear_units: 2048
299
+ num_blocks: 12
300
+ dropout_rate: 0.1
301
+ attention_dropout_rate: 0.0
302
+ input_layer: conv2d
303
+ normalize_before: true
304
+ asr_postencoder: null
305
+ asr_postencoder_conf: {}
306
+ asr_decoder: transformer
307
+ asr_decoder_conf:
308
+ input_layer: embed
309
+ attention_heads: 4
310
+ linear_units: 2048
311
+ num_blocks: 6
312
+ dropout_rate: 0.1
313
+ positional_dropout_rate: 0.0
314
+ self_attention_dropout_rate: 0.0
315
+ src_attention_dropout_rate: 0.0
316
+ st_preencoder: null
317
+ st_preencoder_conf: {}
318
+ st_encoder: rnn
319
+ st_encoder_conf: {}
320
+ st_postencoder: null
321
+ st_postencoder_conf: {}
322
+ st_decoder: rnn
323
+ st_decoder_conf: {}
324
+ st_extra_asr_decoder: rnn
325
+ st_extra_asr_decoder_conf: {}
326
+ st_extra_mt_decoder: rnn
327
+ st_extra_mt_decoder_conf: {}
328
+ required:
329
+ - output_dir
330
+ - token_list
331
+ version: '202204'
332
+ distributed: false
333
+ ```
334
+
335
+ </details>
336
+
337
+
338
+
339
+ ### Citing ESPnet
340
+
341
+ ```BibTex
342
+ @inproceedings{watanabe2018espnet,
343
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
344
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
345
+ year={2018},
346
+ booktitle={Proceedings of Interspeech},
347
+ pages={2207--2211},
348
+ doi={10.21437/Interspeech.2018-1456},
349
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
350
+ }
351
+
352
+
353
+
354
+
355
+ ```
356
+
357
+ or arXiv:
358
+
359
+ ```bibtex
360
+ @misc{watanabe2018espnet,
361
+ title={ESPnet: End-to-End Speech Processing Toolkit},
362
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
363
+ year={2018},
364
+ eprint={1804.00015},
365
+ archivePrefix={arXiv},
366
+ primaryClass={cs.CL}
367
+ }
368
+ ```
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202204'
2
+ files:
3
+ enh_s2t_model_file: /ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/valid.acc.ave_10best.pth
4
+ lm_file: /ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/12epoch.pth
5
+ python: "3.7.13 (default, Mar 29 2022, 02:18:16) \n[GCC 7.5.0]"
6
+ timestamp: 1651121468.056503
7
+ torch: 1.8.1
8
+ yaml_files:
9
+ enh_s2t_train_config: /ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/config.yaml
10
+ lm_train_config: /ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/config.yaml
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/12epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13dc5af025af5be3560922587402820207a78d258eb78fe1d47553a62b4b5895
3
+ size 202293743
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/config.yaml ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp_real2/lm_train_lm_transformer_en_char
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 30
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 4
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ wandb_entity: null
57
+ wandb_name: null
58
+ wandb_model_log_interval: -1
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param: []
62
+ ignore_init_mismatch: false
63
+ freeze_param: []
64
+ num_iters_per_epoch: null
65
+ batch_size: 20
66
+ valid_batch_size: null
67
+ batch_bins: 150000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp_real2/lm_stats_en_char/train/text_shape.char
71
+ valid_shape_file:
72
+ - exp_real2/lm_stats_en_char/valid/text_shape.char
73
+ batch_type: numel
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 150
77
+ sort_in_batch: descending
78
+ sort_batch: descending
79
+ multiple_iterator: false
80
+ chunk_length: 500
81
+ chunk_shift_ratio: 0.5
82
+ num_cache_chunks: 1024
83
+ train_data_path_and_name_and_type:
84
+ - - dump/raw/lm_train.txt
85
+ - text
86
+ - text
87
+ valid_data_path_and_name_and_type:
88
+ - - dump/raw/dt05_multi_isolated_1ch_track/text
89
+ - text
90
+ - text
91
+ allow_variable_data_keys: false
92
+ max_cache_size: 0.0
93
+ max_cache_fd: 32
94
+ valid_max_cache_size: null
95
+ optim: adam
96
+ optim_conf:
97
+ lr: 0.001
98
+ scheduler: warmuplr
99
+ scheduler_conf:
100
+ warmup_steps: 25000
101
+ token_list:
102
+ - <blank>
103
+ - <unk>
104
+ - <space>
105
+ - E
106
+ - T
107
+ - A
108
+ - N
109
+ - I
110
+ - O
111
+ - S
112
+ - R
113
+ - H
114
+ - L
115
+ - D
116
+ - C
117
+ - U
118
+ - M
119
+ - P
120
+ - F
121
+ - G
122
+ - Y
123
+ - W
124
+ - B
125
+ - V
126
+ - K
127
+ - .
128
+ - X
129
+ - ''''
130
+ - J
131
+ - Q
132
+ - Z
133
+ - ','
134
+ - '-'
135
+ - '"'
136
+ - <NOISE>
137
+ - '*'
138
+ - ':'
139
+ - (
140
+ - )
141
+ - '?'
142
+ - '&'
143
+ - ;
144
+ - '!'
145
+ - /
146
+ - '{'
147
+ - '}'
148
+ - '1'
149
+ - '2'
150
+ - '0'
151
+ - $
152
+ - '8'
153
+ - '9'
154
+ - '6'
155
+ - '3'
156
+ - '5'
157
+ - '7'
158
+ - '4'
159
+ - '~'
160
+ - '`'
161
+ - _
162
+ - <*IN*>
163
+ - <*MR.*>
164
+ - \
165
+ - ^
166
+ - <sos/eos>
167
+ init: null
168
+ model_conf:
169
+ ignore_id: 0
170
+ use_preprocessor: true
171
+ token_type: char
172
+ bpemodel: null
173
+ non_linguistic_symbols: data/nlsyms.txt
174
+ cleaner: null
175
+ g2p: null
176
+ lm: transformer
177
+ lm_conf:
178
+ pos_enc: null
179
+ embed_unit: 128
180
+ att_unit: 512
181
+ head: 8
182
+ unit: 2048
183
+ layer: 16
184
+ dropout_rate: 0.1
185
+ required:
186
+ - output_dir
187
+ - token_list
188
+ version: 0.10.7a1
189
+ distributed: false
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/backward_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/forward_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/gpu_max_cached_mem_GB.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/iter_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/loss.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/optim0_lr0.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/optim_step_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/images/train_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/asr1/exp/lm_train_lm_transformer_en_char/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 1.8016810278173603
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/data/nlsyms.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ <*IN*>
2
+ <*MR.*>
3
+ <NOISE>
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/RESULTS.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Apr 28 00:09:17 EDT 2022`
5
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202204`
7
+ - pytorch version: `pytorch 1.8.1`
8
+ - Git hash: ``
9
+ - Commit date: ``
10
+
11
+ ## enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|27119|93.0|5.2|1.8|0.6|7.7|53.3|
17
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|27119|93.9|4.5|1.6|0.5|6.7|49.9|
18
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|27119|91.8|6.0|2.2|0.8|9.0|57.7|
19
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|27120|92.2|6.0|1.9|0.7|8.6|55.5|
20
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|27120|93.6|4.9|1.5|0.6|7.1|51.6|
21
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|27120|89.9|7.6|2.4|1.0|11.1|59.7|
22
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|21409|86.7|9.7|3.5|1.3|14.5|64.7|
23
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|21409|89.2|7.9|2.9|1.0|11.8|61.2|
24
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|21409|84.6|11.4|4.0|1.5|17.0|69.4|
25
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|21416|86.0|10.5|3.5|1.5|15.5|67.5|
26
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|21416|88.1|8.9|3.1|1.2|13.1|64.8|
27
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|21416|82.8|13.1|4.1|1.9|19.1|69.4|
28
+
29
+ ### CER
30
+
31
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
32
+ |---|---|---|---|---|---|---|---|---|
33
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_2mics|1640|160390|96.6|1.4|2.0|0.6|4.0|53.3|
34
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_beamformit_5mics|1640|160390|97.1|1.1|1.8|0.5|3.4|49.9|
35
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|160390|95.9|1.7|2.3|0.8|4.8|57.7|
36
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_2mics|1640|160400|95.9|1.7|2.3|0.7|4.8|55.5|
37
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_beamformit_5mics|1640|160400|96.8|1.4|1.9|0.6|3.8|51.6|
38
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|160400|94.7|2.5|2.9|1.0|6.3|59.7|
39
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_2mics|1320|126796|92.8|3.2|4.0|1.2|8.4|64.7|
40
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_beamformit_5mics|1320|126796|94.3|2.4|3.3|1.0|6.6|61.2|
41
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|126796|91.5|3.8|4.6|1.6|10.0|69.4|
42
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_2mics|1320|126812|92.2|3.5|4.2|1.7|9.5|67.5|
43
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_beamformit_5mics|1320|126812|93.7|2.7|3.5|1.4|7.7|64.8|
44
+ |decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|126812|90.3|4.8|4.9|2.2|11.9|69.4|
45
+
46
+ ### TER
47
+
48
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
49
+ |---|---|---|---|---|---|---|---|---|
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/RESULTS_enh.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Apr 28 00:34:37 EDT 2022`
5
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.7a1`
7
+ - pytorch version: `pytorch 1.10.1`
8
+ - Git hash: ``
9
+ - Commit date: ``
10
+
11
+
12
+ ## decode_asr_transformer_normalize_output_wavtrue_lm_lm_train_lm_transformer_en_char_valid.loss.ave_enh_asr_model_valid.acc.ave
13
+
14
+
15
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
16
+ |---|---|---|---|---|---|
17
+ |dt05_simu_isolated_1ch_track|0.87|7.14|7.14|0.00|4.51|
18
+ |et05_simu_isolated_1ch_track|0.85|7.47|7.47|0.00|3.02|
19
+
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/config.yaml ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_enh_asr_convtasnet_fbank_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 0
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: 5
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 10
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 5
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 2
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: null
68
+ batch_size: 16
69
+ valid_batch_size: null
70
+ batch_bins: 1000000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/enh_asr_stats_raw_en_char/train/speech_shape
74
+ - exp/enh_asr_stats_raw_en_char/train/speech_ref1_shape
75
+ - exp/enh_asr_stats_raw_en_char/train/text_shape.char
76
+ valid_shape_file:
77
+ - exp/enh_asr_stats_raw_en_char/valid/speech_shape
78
+ - exp/enh_asr_stats_raw_en_char/valid/speech_ref1_shape
79
+ - exp/enh_asr_stats_raw_en_char/valid/text_shape.char
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length:
83
+ - 80000
84
+ - 80000
85
+ - 150
86
+ sort_in_batch: descending
87
+ sort_batch: descending
88
+ multiple_iterator: false
89
+ chunk_length: 500
90
+ chunk_shift_ratio: 0.5
91
+ num_cache_chunks: 1024
92
+ train_data_path_and_name_and_type:
93
+ - - dump/raw/tr05_multi_noisy_si284/wav.scp
94
+ - speech
95
+ - sound
96
+ - - dump/raw/tr05_multi_noisy_si284/spk1.scp
97
+ - speech_ref1
98
+ - sound
99
+ - - dump/raw/tr05_multi_noisy_si284/text
100
+ - text
101
+ - text
102
+ valid_data_path_and_name_and_type:
103
+ - - dump/raw/dt05_multi_isolated_1ch_track/wav.scp
104
+ - speech
105
+ - sound
106
+ - - dump/raw/dt05_multi_isolated_1ch_track/spk1.scp
107
+ - speech_ref1
108
+ - sound
109
+ - - dump/raw/dt05_multi_isolated_1ch_track/text
110
+ - text
111
+ - text
112
+ allow_variable_data_keys: false
113
+ max_cache_size: 0.0
114
+ max_cache_fd: 32
115
+ valid_max_cache_size: null
116
+ optim: adam
117
+ optim_conf:
118
+ lr: 0.002
119
+ scheduler: warmuplr
120
+ scheduler_conf:
121
+ warmup_steps: 20000
122
+ token_list: data/en_token_list/char/tokens.txt
123
+ src_token_list: null
124
+ init: xavier_uniform
125
+ input_size: null
126
+ ctc_conf:
127
+ dropout_rate: 0.0
128
+ ctc_type: builtin
129
+ reduce: true
130
+ ignore_nan_grad: true
131
+ enh_criterions:
132
+ - name: si_snr
133
+ conf:
134
+ eps: 1e-7
135
+ wrapper: fixed_order
136
+ wrapper_conf:
137
+ weight: 1.0
138
+ enh_model_conf:
139
+ stft_consistency: false
140
+ loss_type: mask_mse
141
+ mask_type: null
142
+ asr_model_conf:
143
+ ctc_weight: 0.3
144
+ lsm_weight: 0.1
145
+ length_normalized_loss: false
146
+ extract_feats_in_collect_stats: false
147
+ st_model_conf:
148
+ stft_consistency: false
149
+ loss_type: mask_mse
150
+ mask_type: null
151
+ subtask_series:
152
+ - enh
153
+ - asr
154
+ model_conf:
155
+ bypass_enh_prob: 0.0
156
+ use_preprocessor: true
157
+ token_type: char
158
+ bpemodel: null
159
+ src_token_type: bpe
160
+ src_bpemodel: null
161
+ non_linguistic_symbols: data/nlsyms.txt
162
+ cleaner: null
163
+ g2p: null
164
+ enh_encoder: conv
165
+ enh_encoder_conf:
166
+ channel: 256
167
+ kernel_size: 40
168
+ stride: 20
169
+ enh_separator: tcn
170
+ enh_separator_conf:
171
+ num_spk: 1
172
+ layer: 4
173
+ stack: 2
174
+ bottleneck_dim: 256
175
+ hidden_dim: 512
176
+ kernel: 3
177
+ causal: false
178
+ norm_type: gLN
179
+ nonlinear: relu
180
+ enh_decoder: conv
181
+ enh_decoder_conf:
182
+ channel: 256
183
+ kernel_size: 40
184
+ stride: 20
185
+ frontend: default
186
+ frontend_conf:
187
+ fs: 16k
188
+ n_fft: 512
189
+ win_length: 400
190
+ hop_length: 160
191
+ frontend_conf: null
192
+ apply_stft: true
193
+ specaug: specaug
194
+ specaug_conf:
195
+ apply_time_warp: true
196
+ time_warp_window: 5
197
+ time_warp_mode: bicubic
198
+ apply_freq_mask: true
199
+ freq_mask_width_range:
200
+ - 0
201
+ - 30
202
+ num_freq_mask: 2
203
+ apply_time_mask: true
204
+ time_mask_width_range:
205
+ - 0
206
+ - 40
207
+ num_time_mask: 2
208
+ normalize: utterance_mvn
209
+ normalize_conf: {}
210
+ asr_preencoder: null
211
+ asr_preencoder_conf: {}
212
+ asr_encoder: transformer
213
+ asr_encoder_conf:
214
+ output_size: 256
215
+ attention_heads: 4
216
+ linear_units: 2048
217
+ num_blocks: 12
218
+ dropout_rate: 0.1
219
+ attention_dropout_rate: 0.0
220
+ input_layer: conv2d
221
+ normalize_before: true
222
+ asr_postencoder: null
223
+ asr_postencoder_conf: {}
224
+ asr_decoder: transformer
225
+ asr_decoder_conf:
226
+ input_layer: embed
227
+ attention_heads: 4
228
+ linear_units: 2048
229
+ num_blocks: 6
230
+ dropout_rate: 0.1
231
+ positional_dropout_rate: 0.0
232
+ self_attention_dropout_rate: 0.0
233
+ src_attention_dropout_rate: 0.0
234
+ st_preencoder: null
235
+ st_preencoder_conf: {}
236
+ st_encoder: rnn
237
+ st_encoder_conf: {}
238
+ st_postencoder: null
239
+ st_postencoder_conf: {}
240
+ st_decoder: rnn
241
+ st_decoder_conf: {}
242
+ st_extra_asr_decoder: rnn
243
+ st_extra_asr_decoder_conf: {}
244
+ st_extra_mt_decoder: rnn
245
+ st_extra_mt_decoder_conf: {}
246
+ required:
247
+ - output_dir
248
+ - token_list
249
+ version: '202204'
250
+ distributed: false
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/acc.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/backward_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/cer.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/cer_ctc.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/forward_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/gpu_max_cached_mem_GB.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/iter_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_att.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_ctc.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/loss_enh.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/optim0_lr0.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/optim_step_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/train_time.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/images/wer.png ADDED
ocean/projects/cis210027p/xuankaic/experiments/espnet/egs2/chime4/enh_asr1/exp/enh_asr_train_enh_asr_convtasnet_fbank_transformer_raw_en_char/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1bd3d03af56ea6ecba96112d955cd34c56ca0137437f07509c5218246495de
3
+ size 117936909