“siddhu001” commited on
Commit
017260d
1 Parent(s): 848fa78

Update model

Browse files
README.md ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - slue-voxceleb
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/sluevoxceleb_wavlm_lightweight_sa`
15
+
16
+ This model was trained by “siddhu001” using slue-voxceleb recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout e23ef85f0b3116ad5c60d0833f186da0deec0734
26
+ pip install -e .
27
+ cd egs2/slue-voxceleb/slu1_superb_correct
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/sluevoxceleb_wavlm_lightweight_sa
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Wed Feb 7 23:05:29 CST 2024`
35
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202310`
37
+ - pytorch version: `pytorch 2.1.0+cu121`
38
+ - Git hash: `21d2105784e4da98397bf487b2550d4c6e16d40d`
39
+ - Commit date: `Wed Jan 31 13:40:37 2024 -0600`
40
+
41
+ ## exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|1436|73.2|26.8|0.0|0.0|26.8|26.8|
47
+ |decode_asr_slu_model_valid.loss.ave/test|3426|3426|73.0|27.0|0.0|0.0|27.0|27.0|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|10365|77.0|20.7|2.3|1.3|24.4|26.8|
54
+ |decode_asr_slu_model_valid.loss.ave/test|3426|24887|77.1|20.5|2.4|1.2|24.1|27.0|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ ## exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/decode_asr_slu_model_valid.loss.ave
61
+ ### WER
62
+
63
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
64
+ |---|---|---|---|---|---|---|---|---|
65
+ |org/devel|1437|1437|73.2|26.8|0.0|0.0|26.8|26.8|
66
+
67
+ ### CER
68
+
69
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
70
+ |---|---|---|---|---|---|---|---|---|
71
+ |org/devel|1437|10372|77.0|20.7|2.3|1.3|24.3|26.8|
72
+
73
+ ### TER
74
+
75
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
76
+ |---|---|---|---|---|---|---|---|---|
77
+
78
+ ## ASR config
79
+
80
+ <details><summary>expand</summary>
81
+
82
+ ```
83
+ config: conf/train_asr_wavlm_large_0.01.yaml
84
+ print_config: false
85
+ log_level: INFO
86
+ drop_last_iter: false
87
+ dry_run: false
88
+ iterator_type: sequence
89
+ valid_iterator_type: null
90
+ output_dir: exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp
91
+ ngpu: 1
92
+ seed: 0
93
+ num_workers: 1
94
+ num_att_plot: 3
95
+ dist_backend: nccl
96
+ dist_init_method: env://
97
+ dist_world_size: 4
98
+ dist_rank: 0
99
+ local_rank: 0
100
+ dist_master_addr: localhost
101
+ dist_master_port: 53613
102
+ dist_launcher: null
103
+ multiprocessing_distributed: true
104
+ unused_parameters: true
105
+ sharded_ddp: false
106
+ cudnn_enabled: true
107
+ cudnn_benchmark: false
108
+ cudnn_deterministic: true
109
+ collect_stats: false
110
+ write_collected_feats: false
111
+ max_epoch: 50
112
+ patience: null
113
+ val_scheduler_criterion:
114
+ - valid
115
+ - loss
116
+ early_stopping_criterion:
117
+ - valid
118
+ - loss
119
+ - min
120
+ best_model_criterion:
121
+ - - valid
122
+ - loss
123
+ - min
124
+ keep_nbest_models: 10
125
+ nbest_averaging_interval: 0
126
+ grad_clip: 5.0
127
+ grad_clip_type: 2.0
128
+ grad_noise: false
129
+ accum_grad: 1
130
+ no_forward_run: false
131
+ resume: true
132
+ train_dtype: float32
133
+ use_amp: false
134
+ log_interval: null
135
+ use_matplotlib: true
136
+ use_tensorboard: true
137
+ create_graph_in_tensorboard: false
138
+ use_wandb: false
139
+ wandb_project: null
140
+ wandb_id: null
141
+ wandb_entity: null
142
+ wandb_name: null
143
+ wandb_model_log_interval: -1
144
+ detect_anomaly: false
145
+ use_lora: false
146
+ save_lora_only: true
147
+ lora_conf: {}
148
+ pretrain_path: null
149
+ init_param: []
150
+ ignore_init_mismatch: false
151
+ freeze_param:
152
+ - frontend.upstream
153
+ num_iters_per_epoch: null
154
+ batch_size: 320
155
+ valid_batch_size: null
156
+ batch_bins: 1000000
157
+ valid_batch_bins: null
158
+ train_shape_file:
159
+ - exp/slu_stats_raw_en_word_sp/train/speech_shape
160
+ - exp/slu_stats_raw_en_word_sp/train/text_shape.word
161
+ valid_shape_file:
162
+ - exp/slu_stats_raw_en_word_sp/valid/speech_shape
163
+ - exp/slu_stats_raw_en_word_sp/valid/text_shape.word
164
+ batch_type: folded
165
+ valid_batch_type: null
166
+ fold_length:
167
+ - 80000
168
+ - 150
169
+ sort_in_batch: descending
170
+ shuffle_within_batch: false
171
+ sort_batch: descending
172
+ multiple_iterator: false
173
+ chunk_length: 500
174
+ chunk_shift_ratio: 0.5
175
+ num_cache_chunks: 1024
176
+ chunk_excluded_key_prefixes: []
177
+ chunk_default_fs: null
178
+ train_data_path_and_name_and_type:
179
+ - - dump/raw/train_sp/wav.scp
180
+ - speech
181
+ - sound
182
+ - - dump/raw/train_sp/text
183
+ - text
184
+ - text
185
+ valid_data_path_and_name_and_type:
186
+ - - dump/raw/devel/wav.scp
187
+ - speech
188
+ - sound
189
+ - - dump/raw/devel/text
190
+ - text
191
+ - text
192
+ allow_variable_data_keys: false
193
+ max_cache_size: 0.0
194
+ max_cache_fd: 32
195
+ allow_multi_rates: false
196
+ valid_max_cache_size: null
197
+ exclude_weight_decay: false
198
+ exclude_weight_decay_conf: {}
199
+ optim: adam
200
+ optim_conf:
201
+ lr: 0.01
202
+ scheduler: warmuplr
203
+ scheduler_conf:
204
+ warmup_steps: 1000
205
+ token_list:
206
+ - <blank>
207
+ - <unk>
208
+ - Neutral
209
+ - Positive
210
+ - Negative
211
+ - <sos/eos>
212
+ transcript_token_list: null
213
+ two_pass: false
214
+ pre_postencoder_norm: false
215
+ init: null
216
+ input_size: null
217
+ ctc_conf:
218
+ dropout_rate: 0.0
219
+ ctc_type: builtin
220
+ reduce: true
221
+ ignore_nan_grad: null
222
+ zero_infinity: true
223
+ brctc_risk_strategy: exp
224
+ brctc_group_strategy: end
225
+ brctc_risk_factor: 0.0
226
+ joint_net_conf: null
227
+ use_preprocessor: true
228
+ token_type: word
229
+ bpemodel: null
230
+ non_linguistic_symbols: null
231
+ cleaner: null
232
+ g2p: null
233
+ speech_volume_normalize: null
234
+ rir_scp: null
235
+ rir_apply_prob: 1.0
236
+ noise_scp: null
237
+ noise_apply_prob: 1.0
238
+ noise_db_range: '13_15'
239
+ short_noise_thres: 0.5
240
+ frontend: s3prl
241
+ frontend_conf:
242
+ frontend_conf:
243
+ upstream: wavlm_large
244
+ download_dir: ./hub
245
+ multilayer_feature: true
246
+ fs: 16k
247
+ specaug: specaug
248
+ specaug_conf:
249
+ apply_time_warp: true
250
+ time_warp_window: 5
251
+ time_warp_mode: bicubic
252
+ apply_freq_mask: true
253
+ freq_mask_width_range:
254
+ - 0
255
+ - 30
256
+ num_freq_mask: 2
257
+ apply_time_mask: true
258
+ time_mask_width_range:
259
+ - 0
260
+ - 40
261
+ num_time_mask: 2
262
+ normalize: utterance_mvn
263
+ normalize_conf: {}
264
+ model: espnet
265
+ model_conf:
266
+ ctc_weight: 0.0
267
+ lsm_weight: 0.0
268
+ length_normalized_loss: false
269
+ superb_setup: true
270
+ num_class: 3
271
+ ssl_input_size: 1024
272
+ extract_feats_in_collect_stats: false
273
+ preencoder: null
274
+ preencoder_conf: {}
275
+ encoder: rnn
276
+ encoder_conf: {}
277
+ postencoder: null
278
+ postencoder_conf: {}
279
+ deliberationencoder: null
280
+ deliberationencoder_conf: {}
281
+ decoder: rnn
282
+ decoder_conf: {}
283
+ postdecoder: null
284
+ postdecoder_conf: {}
285
+ required:
286
+ - output_dir
287
+ - token_list
288
+ version: '202310'
289
+ distributed: true
290
+ ```
291
+
292
+ </details>
293
+
294
+
295
+
296
+ ### Citing ESPnet
297
+
298
+ ```BibTex
299
+ @inproceedings{watanabe2018espnet,
300
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
301
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
302
+ year={2018},
303
+ booktitle={Proceedings of Interspeech},
304
+ pages={2207--2211},
305
+ doi={10.21437/Interspeech.2018-1456},
306
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
307
+ }
308
+
309
+
310
+
311
+
312
+
313
+
314
+ ```
315
+
316
+ or arXiv:
317
+
318
+ ```bibtex
319
+ @misc{watanabe2018espnet,
320
+ title={ESPnet: End-to-End Speech Processing Toolkit},
321
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
322
+ year={2018},
323
+ eprint={1804.00015},
324
+ archivePrefix={arXiv},
325
+ primaryClass={cs.CL}
326
+ }
327
+ ```
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/RESULTS.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Wed Feb 7 23:05:29 CST 2024`
5
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202310`
7
+ - pytorch version: `pytorch 2.1.0+cu121`
8
+ - Git hash: `21d2105784e4da98397bf487b2550d4c6e16d40d`
9
+ - Commit date: `Wed Jan 31 13:40:37 2024 -0600`
10
+
11
+ ## exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|1436|73.2|26.8|0.0|0.0|26.8|26.8|
17
+ |decode_asr_slu_model_valid.loss.ave/test|3426|3426|73.0|27.0|0.0|0.0|27.0|27.0|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_slu_model_valid.loss.ave/devel|1436|10365|77.0|20.7|2.3|1.3|24.4|26.8|
24
+ |decode_asr_slu_model_valid.loss.ave/test|3426|24887|77.1|20.5|2.4|1.2|24.1|27.0|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ ## exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/decode_asr_slu_model_valid.loss.ave
31
+ ### WER
32
+
33
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
34
+ |---|---|---|---|---|---|---|---|---|
35
+ |org/devel|1437|1437|73.2|26.8|0.0|0.0|26.8|26.8|
36
+
37
+ ### CER
38
+
39
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
40
+ |---|---|---|---|---|---|---|---|---|
41
+ |org/devel|1437|10372|77.0|20.7|2.3|1.3|24.3|26.8|
42
+
43
+ ### TER
44
+
45
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
46
+ |---|---|---|---|---|---|---|---|---|
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/config.yaml ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_wavlm_large_0.01.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 1
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 53613
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 50
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - loss
41
+ - min
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param:
70
+ - frontend.upstream
71
+ num_iters_per_epoch: null
72
+ batch_size: 320
73
+ valid_batch_size: null
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/slu_stats_raw_en_word_sp/train/speech_shape
78
+ - exp/slu_stats_raw_en_word_sp/train/text_shape.word
79
+ valid_shape_file:
80
+ - exp/slu_stats_raw_en_word_sp/valid/speech_shape
81
+ - exp/slu_stats_raw_en_word_sp/valid/text_shape.word
82
+ batch_type: folded
83
+ valid_batch_type: null
84
+ fold_length:
85
+ - 80000
86
+ - 150
87
+ sort_in_batch: descending
88
+ shuffle_within_batch: false
89
+ sort_batch: descending
90
+ multiple_iterator: false
91
+ chunk_length: 500
92
+ chunk_shift_ratio: 0.5
93
+ num_cache_chunks: 1024
94
+ chunk_excluded_key_prefixes: []
95
+ chunk_default_fs: null
96
+ train_data_path_and_name_and_type:
97
+ - - dump/raw/train_sp/wav.scp
98
+ - speech
99
+ - sound
100
+ - - dump/raw/train_sp/text
101
+ - text
102
+ - text
103
+ valid_data_path_and_name_and_type:
104
+ - - dump/raw/devel/wav.scp
105
+ - speech
106
+ - sound
107
+ - - dump/raw/devel/text
108
+ - text
109
+ - text
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ allow_multi_rates: false
114
+ valid_max_cache_size: null
115
+ exclude_weight_decay: false
116
+ exclude_weight_decay_conf: {}
117
+ optim: adam
118
+ optim_conf:
119
+ lr: 0.01
120
+ scheduler: warmuplr
121
+ scheduler_conf:
122
+ warmup_steps: 1000
123
+ token_list:
124
+ - <blank>
125
+ - <unk>
126
+ - Neutral
127
+ - Positive
128
+ - Negative
129
+ - <sos/eos>
130
+ transcript_token_list: null
131
+ two_pass: false
132
+ pre_postencoder_norm: false
133
+ init: null
134
+ input_size: null
135
+ ctc_conf:
136
+ dropout_rate: 0.0
137
+ ctc_type: builtin
138
+ reduce: true
139
+ ignore_nan_grad: null
140
+ zero_infinity: true
141
+ brctc_risk_strategy: exp
142
+ brctc_group_strategy: end
143
+ brctc_risk_factor: 0.0
144
+ joint_net_conf: null
145
+ use_preprocessor: true
146
+ token_type: word
147
+ bpemodel: null
148
+ non_linguistic_symbols: null
149
+ cleaner: null
150
+ g2p: null
151
+ speech_volume_normalize: null
152
+ rir_scp: null
153
+ rir_apply_prob: 1.0
154
+ noise_scp: null
155
+ noise_apply_prob: 1.0
156
+ noise_db_range: '13_15'
157
+ short_noise_thres: 0.5
158
+ frontend: s3prl
159
+ frontend_conf:
160
+ frontend_conf:
161
+ upstream: wavlm_large
162
+ download_dir: ./hub
163
+ multilayer_feature: true
164
+ fs: 16k
165
+ specaug: specaug
166
+ specaug_conf:
167
+ apply_time_warp: true
168
+ time_warp_window: 5
169
+ time_warp_mode: bicubic
170
+ apply_freq_mask: true
171
+ freq_mask_width_range:
172
+ - 0
173
+ - 30
174
+ num_freq_mask: 2
175
+ apply_time_mask: true
176
+ time_mask_width_range:
177
+ - 0
178
+ - 40
179
+ num_time_mask: 2
180
+ normalize: utterance_mvn
181
+ normalize_conf: {}
182
+ model: espnet
183
+ model_conf:
184
+ ctc_weight: 0.0
185
+ lsm_weight: 0.0
186
+ length_normalized_loss: false
187
+ superb_setup: true
188
+ num_class: 3
189
+ ssl_input_size: 1024
190
+ extract_feats_in_collect_stats: false
191
+ preencoder: null
192
+ preencoder_conf: {}
193
+ encoder: rnn
194
+ encoder_conf: {}
195
+ postencoder: null
196
+ postencoder_conf: {}
197
+ deliberationencoder: null
198
+ deliberationencoder_conf: {}
199
+ decoder: rnn
200
+ decoder_conf: {}
201
+ postdecoder: null
202
+ postdecoder_conf: {}
203
+ required:
204
+ - output_dir
205
+ - token_list
206
+ version: '202310'
207
+ distributed: true
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/acc.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/backward_time.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/cer.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/clip.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/forward_time.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/grad_norm.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/iter_time.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/loss.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/loss_att.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/loss_scale.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/optim0_lr0.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/optim_step_time.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/train_time.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/images/wer.png ADDED
exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/valid.loss.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320cbb998d96bf023a9d3b04b4cd844a8ce5dc4cf3d9fb3dfed478a16d12a0f4
3
+ size 1266238662
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ slu_model_file: exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/valid.loss.ave_10best.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) \n[GCC 11.2.0]"
5
+ timestamp: 1715349140.023715
6
+ torch: 2.1.0+cu121
7
+ yaml_files:
8
+ slu_train_config: exp/slu_train_asr_wavlm_large_0.01_raw_en_word_sp/config.yaml