“siddhu001” commited on
Commit
844875d
1 Parent(s): 8423cf9

Update model

Browse files
Files changed (20) hide show
  1. README.md +329 -0
  2. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/RESULTS.md +46 -0
  3. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/config.yaml +209 -0
  4. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/acc.png +0 -0
  5. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/backward_time.png +0 -0
  6. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/cer.png +0 -0
  7. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/clip.png +0 -0
  8. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/forward_time.png +0 -0
  9. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/gpu_max_cached_mem_GB.png +0 -0
  10. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/grad_norm.png +0 -0
  11. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/iter_time.png +0 -0
  12. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss.png +0 -0
  13. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss_att.png +0 -0
  14. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss_scale.png +0 -0
  15. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/optim0_lr0.png +0 -0
  16. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/optim_step_time.png +0 -0
  17. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/train_time.png +0 -0
  18. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/wer.png +0 -0
  19. exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/valid.loss.ave_10best.pth +3 -0
  20. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - slue-voxceleb
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/sluevoxceleb_whisper_lightweight_sa`
15
+
16
+ This model was trained by “siddhu001” using slue-voxceleb recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout e23ef85f0b3116ad5c60d0833f186da0deec0734
26
+ pip install -e .
27
+ cd egs2/slue-voxceleb/slu1_superb_correct
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/sluevoxceleb_whisper_lightweight_sa
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Wed Feb 7 23:07:47 CST 2024`
35
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202310`
37
+ - pytorch version: `pytorch 2.1.0+cu121`
38
+ - Git hash: `21d2105784e4da98397bf487b2550d4c6e16d40d`
39
+ - Commit date: `Wed Jan 31 13:40:37 2024 -0600`
40
+
41
+ ## exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|1436|79.3|20.7|0.0|0.0|20.7|20.7|
47
+ |decode_asr_slu_model_valid.loss.ave/test|3426|3426|80.1|19.9|0.0|0.0|19.9|19.9|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|10365|81.9|16.1|2.0|0.9|19.0|20.7|
54
+ |decode_asr_slu_model_valid.loss.ave/test|3426|24887|82.8|15.1|2.2|0.5|17.8|19.9|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ ## exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/decode_asr_slu_model_valid.loss.ave
61
+ ### WER
62
+
63
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
64
+ |---|---|---|---|---|---|---|---|---|
65
+ |org/devel|1437|1437|79.3|20.7|0.0|0.0|20.7|20.7|
66
+
67
+ ### CER
68
+
69
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
70
+ |---|---|---|---|---|---|---|---|---|
71
+ |org/devel|1437|10372|81.9|16.1|2.0|0.9|19.0|20.7|
72
+
73
+ ### TER
74
+
75
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
76
+ |---|---|---|---|---|---|---|---|---|
77
+
78
+ ## ASR config
79
+
80
+ <details><summary>expand</summary>
81
+
82
+ ```
83
+ config: conf/train_asr_whisper_weighted_0.01.yaml
84
+ print_config: false
85
+ log_level: INFO
86
+ drop_last_iter: false
87
+ dry_run: false
88
+ iterator_type: sequence
89
+ valid_iterator_type: null
90
+ output_dir: exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp
91
+ ngpu: 1
92
+ seed: 0
93
+ num_workers: 1
94
+ num_att_plot: 3
95
+ dist_backend: nccl
96
+ dist_init_method: env://
97
+ dist_world_size: 4
98
+ dist_rank: 0
99
+ local_rank: 0
100
+ dist_master_addr: localhost
101
+ dist_master_port: 42077
102
+ dist_launcher: null
103
+ multiprocessing_distributed: true
104
+ unused_parameters: true
105
+ sharded_ddp: false
106
+ cudnn_enabled: true
107
+ cudnn_benchmark: false
108
+ cudnn_deterministic: true
109
+ collect_stats: false
110
+ write_collected_feats: false
111
+ max_epoch: 50
112
+ patience: null
113
+ val_scheduler_criterion:
114
+ - valid
115
+ - loss
116
+ early_stopping_criterion:
117
+ - valid
118
+ - loss
119
+ - min
120
+ best_model_criterion:
121
+ - - valid
122
+ - loss
123
+ - min
124
+ keep_nbest_models: 10
125
+ nbest_averaging_interval: 0
126
+ grad_clip: 5.0
127
+ grad_clip_type: 2.0
128
+ grad_noise: false
129
+ accum_grad: 1
130
+ no_forward_run: false
131
+ resume: true
132
+ train_dtype: float32
133
+ use_amp: false
134
+ log_interval: null
135
+ use_matplotlib: true
136
+ use_tensorboard: true
137
+ create_graph_in_tensorboard: false
138
+ use_wandb: false
139
+ wandb_project: null
140
+ wandb_id: null
141
+ wandb_entity: null
142
+ wandb_name: null
143
+ wandb_model_log_interval: -1
144
+ detect_anomaly: false
145
+ use_lora: false
146
+ save_lora_only: true
147
+ lora_conf: {}
148
+ pretrain_path: null
149
+ init_param: []
150
+ ignore_init_mismatch: false
151
+ freeze_param:
152
+ - encoder
153
+ num_iters_per_epoch: null
154
+ batch_size: 320
155
+ valid_batch_size: null
156
+ batch_bins: 1000000
157
+ valid_batch_bins: null
158
+ train_shape_file:
159
+ - exp/slu_stats_raw_en_word_sp/train/speech_shape
160
+ - exp/slu_stats_raw_en_word_sp/train/text_shape.word
161
+ valid_shape_file:
162
+ - exp/slu_stats_raw_en_word_sp/valid/speech_shape
163
+ - exp/slu_stats_raw_en_word_sp/valid/text_shape.word
164
+ batch_type: folded
165
+ valid_batch_type: null
166
+ fold_length:
167
+ - 80000
168
+ - 150
169
+ sort_in_batch: descending
170
+ shuffle_within_batch: false
171
+ sort_batch: descending
172
+ multiple_iterator: false
173
+ chunk_length: 500
174
+ chunk_shift_ratio: 0.5
175
+ num_cache_chunks: 1024
176
+ chunk_excluded_key_prefixes: []
177
+ chunk_default_fs: null
178
+ train_data_path_and_name_and_type:
179
+ - - dump/raw/train_sp/wav.scp
180
+ - speech
181
+ - sound
182
+ - - dump/raw/train_sp/text
183
+ - text
184
+ - text
185
+ valid_data_path_and_name_and_type:
186
+ - - dump/raw/devel/wav.scp
187
+ - speech
188
+ - sound
189
+ - - dump/raw/devel/text
190
+ - text
191
+ - text
192
+ allow_variable_data_keys: false
193
+ max_cache_size: 0.0
194
+ max_cache_fd: 32
195
+ allow_multi_rates: false
196
+ valid_max_cache_size: null
197
+ exclude_weight_decay: false
198
+ exclude_weight_decay_conf: {}
199
+ optim: adam
200
+ optim_conf:
201
+ lr: 0.01
202
+ scheduler: warmuplr
203
+ scheduler_conf:
204
+ warmup_steps: 1000
205
+ token_list:
206
+ - <blank>
207
+ - <unk>
208
+ - Neutral
209
+ - Positive
210
+ - Negative
211
+ - <sos/eos>
212
+ transcript_token_list: null
213
+ two_pass: false
214
+ pre_postencoder_norm: false
215
+ init: null
216
+ input_size: 1
217
+ ctc_conf:
218
+ dropout_rate: 0.0
219
+ ctc_type: builtin
220
+ reduce: true
221
+ ignore_nan_grad: null
222
+ zero_infinity: true
223
+ brctc_risk_strategy: exp
224
+ brctc_group_strategy: end
225
+ brctc_risk_factor: 0.0
226
+ joint_net_conf: null
227
+ use_preprocessor: true
228
+ token_type: word
229
+ bpemodel: null
230
+ non_linguistic_symbols: null
231
+ cleaner: null
232
+ g2p: null
233
+ speech_volume_normalize: null
234
+ rir_scp: null
235
+ rir_apply_prob: 1.0
236
+ noise_scp: null
237
+ noise_apply_prob: 1.0
238
+ noise_db_range: '13_15'
239
+ short_noise_thres: 0.5
240
+ frontend: null
241
+ frontend_conf: {}
242
+ specaug: null
243
+ specaug_conf: {}
244
+ normalize: null
245
+ normalize_conf: {}
246
+ model: espnet
247
+ model_conf:
248
+ ctc_weight: 0.0
249
+ lsm_weight: 0.1
250
+ length_normalized_loss: false
251
+ superb_setup_encoder: true
252
+ num_class: 3
253
+ ssl_input_size: 1024
254
+ weighted_sum: true
255
+ extract_feats_in_collect_stats: false
256
+ preencoder: null
257
+ preencoder_conf: {}
258
+ encoder: whisper
259
+ encoder_conf:
260
+ whisper_model: medium
261
+ dropout_rate: 0.0
262
+ use_specaug: true
263
+ specaug_conf:
264
+ apply_time_warp: true
265
+ time_warp_window: 5
266
+ time_warp_mode: bicubic
267
+ apply_freq_mask: true
268
+ freq_mask_width_range:
269
+ - 0
270
+ - 40
271
+ num_freq_mask: 2
272
+ apply_time_mask: true
273
+ time_mask_width_ratio_range:
274
+ - 0.0
275
+ - 0.12
276
+ num_time_mask: 5
277
+ prepostencoder: null
278
+ prepostencoder_conf: {}
279
+ postencoder: null
280
+ postencoder_conf: {}
281
+ deliberationencoder: null
282
+ deliberationencoder_conf: {}
283
+ decoder: rnn
284
+ decoder_conf: {}
285
+ postdecoder: null
286
+ postdecoder_conf: {}
287
+ required:
288
+ - output_dir
289
+ - token_list
290
+ version: '202310'
291
+ distributed: true
292
+ ```
293
+
294
+ </details>
295
+
296
+
297
+
298
+ ### Citing ESPnet
299
+
300
+ ```BibTex
301
+ @inproceedings{watanabe2018espnet,
302
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
303
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
304
+ year={2018},
305
+ booktitle={Proceedings of Interspeech},
306
+ pages={2207--2211},
307
+ doi={10.21437/Interspeech.2018-1456},
308
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
309
+ }
310
+
311
+
312
+
313
+
314
+
315
+
316
+ ```
317
+
318
+ or arXiv:
319
+
320
+ ```bibtex
321
+ @misc{watanabe2018espnet,
322
+ title={ESPnet: End-to-End Speech Processing Toolkit},
323
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
324
+ year={2018},
325
+ eprint={1804.00015},
326
+ archivePrefix={arXiv},
327
+ primaryClass={cs.CL}
328
+ }
329
+ ```
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/RESULTS.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Feb 7 23:07:47 CST 2024`
5
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202310`
7
+ - pytorch version: `pytorch 2.1.0+cu121`
8
+ - Git hash: `21d2105784e4da98397bf487b2550d4c6e16d40d`
9
+ - Commit date: `Wed Jan 31 13:40:37 2024 -0600`
10
+
11
+ ## exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|1436|79.3|20.7|0.0|0.0|20.7|20.7|
17
+ |decode_asr_slu_model_valid.loss.ave/test|3426|3426|80.1|19.9|0.0|0.0|19.9|19.9|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|10365|81.9|16.1|2.0|0.9|19.0|20.7|
24
+ |decode_asr_slu_model_valid.loss.ave/test|3426|24887|82.8|15.1|2.2|0.5|17.8|19.9|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ ## exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/decode_asr_slu_model_valid.loss.ave
31
+ ### WER
32
+
33
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
34
+ |---|---|---|---|---|---|---|---|---|
35
+ |org/devel|1437|1437|79.3|20.7|0.0|0.0|20.7|20.7|
36
+
37
+ ### CER
38
+
39
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
40
+ |---|---|---|---|---|---|---|---|---|
41
+ |org/devel|1437|10372|81.9|16.1|2.0|0.9|19.0|20.7|
42
+
43
+ ### TER
44
+
45
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
46
+ |---|---|---|---|---|---|---|---|---|
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/config.yaml ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_whisper_weighted_0.01.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 42077
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 50
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - loss
41
+ - min
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param:
70
+ - encoder
71
+ num_iters_per_epoch: null
72
+ batch_size: 320
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/slu_stats_raw_en_word_sp/train/speech_shape
78
+ - exp/slu_stats_raw_en_word_sp/train/text_shape.word
79
+ valid_shape_file:
80
+ - exp/slu_stats_raw_en_word_sp/valid/speech_shape
81
+ - exp/slu_stats_raw_en_word_sp/valid/text_shape.word
82
+ batch_type: folded
83
+ valid_batch_type: null
84
+ fold_length:
85
+ - 80000
86
+ - 150
87
+ sort_in_batch: descending
88
+ shuffle_within_batch: false
89
+ sort_batch: descending
90
+ multiple_iterator: false
91
+ chunk_length: 500
92
+ chunk_shift_ratio: 0.5
93
+ num_cache_chunks: 1024
94
+ chunk_excluded_key_prefixes: []
95
+ chunk_default_fs: null
96
+ train_data_path_and_name_and_type:
97
+ - - dump/raw/train_sp/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/train_sp/text
101
+ - text
102
+ - text
103
+ valid_data_path_and_name_and_type:
104
+ - - dump/raw/devel/wav.scp
105
+ - speech
106
+ - sound
107
+ - - dump/raw/devel/text
108
+ - text
109
+ - text
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ allow_multi_rates: false
114
+ valid_max_cache_size: null
115
+ exclude_weight_decay: false
116
+ exclude_weight_decay_conf: {}
117
+ optim: adam
118
+ optim_conf:
119
+ lr: 0.01
120
+ scheduler: warmuplr
121
+ scheduler_conf:
122
+ warmup_steps: 1000
123
+ token_list:
124
+ - <blank>
125
+ - <unk>
126
+ - Neutral
127
+ - Positive
128
+ - Negative
129
+ - <sos/eos>
130
+ transcript_token_list: null
131
+ two_pass: false
132
+ pre_postencoder_norm: false
133
+ init: null
134
+ input_size: 1
135
+ ctc_conf:
136
+ dropout_rate: 0.0
137
+ ctc_type: builtin
138
+ reduce: true
139
+ ignore_nan_grad: null
140
+ zero_infinity: true
141
+ brctc_risk_strategy: exp
142
+ brctc_group_strategy: end
143
+ brctc_risk_factor: 0.0
144
+ joint_net_conf: null
145
+ use_preprocessor: true
146
+ token_type: word
147
+ bpemodel: null
148
+ non_linguistic_symbols: null
149
+ cleaner: null
150
+ g2p: null
151
+ speech_volume_normalize: null
152
+ rir_scp: null
153
+ rir_apply_prob: 1.0
154
+ noise_scp: null
155
+ noise_apply_prob: 1.0
156
+ noise_db_range: '13_15'
157
+ short_noise_thres: 0.5
158
+ frontend: null
159
+ frontend_conf: {}
160
+ specaug: null
161
+ specaug_conf: {}
162
+ normalize: null
163
+ normalize_conf: {}
164
+ model: espnet
165
+ model_conf:
166
+ ctc_weight: 0.0
167
+ lsm_weight: 0.1
168
+ length_normalized_loss: false
169
+ superb_setup_encoder: true
170
+ num_class: 3
171
+ ssl_input_size: 1024
172
+ weighted_sum: true
173
+ extract_feats_in_collect_stats: false
174
+ preencoder: null
175
+ preencoder_conf: {}
176
+ encoder: whisper
177
+ encoder_conf:
178
+ whisper_model: medium
179
+ dropout_rate: 0.0
180
+ use_specaug: true
181
+ specaug_conf:
182
+ apply_time_warp: true
183
+ time_warp_window: 5
184
+ time_warp_mode: bicubic
185
+ apply_freq_mask: true
186
+ freq_mask_width_range:
187
+ - 0
188
+ - 40
189
+ num_freq_mask: 2
190
+ apply_time_mask: true
191
+ time_mask_width_ratio_range:
192
+ - 0.0
193
+ - 0.12
194
+ num_time_mask: 5
195
+ prepostencoder: null
196
+ prepostencoder_conf: {}
197
+ postencoder: null
198
+ postencoder_conf: {}
199
+ deliberationencoder: null
200
+ deliberationencoder_conf: {}
201
+ decoder: rnn
202
+ decoder_conf: {}
203
+ postdecoder: null
204
+ postdecoder_conf: {}
205
+ required:
206
+ - output_dir
207
+ - token_list
208
+ version: '202310'
209
+ distributed: true
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/acc.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/backward_time.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/cer.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/clip.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/forward_time.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/grad_norm.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/iter_time.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss_att.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/loss_scale.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/optim0_lr0.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/optim_step_time.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/train_time.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/images/wer.png ADDED
exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/valid.loss.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d2887ba3b9b5bbfdcc0d7fafed3565e902f6a97fd70ffa59cf5ab83d3ee78d
3
+ size 1233216410
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ slu_model_file: exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/valid.loss.ave_10best.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) \n[GCC 11.2.0]"
5
+ timestamp: 1715350137.042493
6
+ torch: 2.1.0+cu121
7
+ yaml_files:
8
+ slu_train_config: exp/slu_train_asr_whisper_weighted_0.01_raw_en_word_sp/config.yaml