siuze commited on
Commit
3c38b0b
1 Parent(s): e91aca8

Update model

Browse files
Files changed (1) hide show
  1. README.md +248 -1
README.md CHANGED
@@ -28,14 +28,261 @@ cd egs2/foc-can/foc
28
  ./run.sh --skip_data_prep false --skip_train true --download_model siuze/FOC-yngping
29
  ```
30
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  ## ASR config
34
 
35
  <details><summary>expand</summary>
36
 
37
  ```
38
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  ```
40
 
41
  </details>
 
28
  ./run.sh --skip_data_prep false --skip_train true --download_model siuze/FOC-yngping
29
  ```
30
 
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Sun Apr 23 18:36:51 CST 2023`
35
+ - python version: `3.8.16 | packaged by conda-forge | (default, Feb 1 2023, 16:01:55) [GCC 11.3.0]`
36
+ - espnet version: `espnet 202301`
37
+ - pytorch version: `pytorch 1.10.0`
38
+ - Git hash: `52160d6ed337e9dec74dd59695fec1548042e0b2`
39
+ - Commit date: `Thu Mar 16 21:37:39 2023 +0000`
40
 
41
+ ## exp/asr_train_asr_transformer_raw_foc_char
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |inference_asr_model_valid.acc.ave/test|51|91|51.6|47.3|1.1|1.1|49.5|68.6|
47
+ |inference_asr_model_valid.acc.ave标准测试/test|500|1083|72.7|26.9|0.5|0.6|27.9|45.2|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |inference_asr_model_valid.acc.ave/test|51|549|86.2|9.3|4.6|2.7|16.6|68.6|
54
+ |inference_asr_model_valid.acc.ave标准测试/test|500|6377|93.4|4.7|1.9|2.2|8.8|45.2|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
 
61
  ## ASR config
62
 
63
  <details><summary>expand</summary>
64
 
65
  ```
66
+ config: conf/train_asr_transformer.yaml
67
+ print_config: false
68
+ log_level: INFO
69
+ dry_run: false
70
+ iterator_type: sequence
71
+ output_dir: exp/asr_train_asr_transformer_raw_foc_char
72
+ ngpu: 1
73
+ seed: 0
74
+ num_workers: 1
75
+ num_att_plot: 3
76
+ dist_backend: nccl
77
+ dist_init_method: env://
78
+ dist_world_size: null
79
+ dist_rank: null
80
+ local_rank: 0
81
+ dist_master_addr: null
82
+ dist_master_port: null
83
+ dist_launcher: null
84
+ multiprocessing_distributed: false
85
+ unused_parameters: false
86
+ sharded_ddp: false
87
+ cudnn_enabled: true
88
+ cudnn_benchmark: false
89
+ cudnn_deterministic: true
90
+ collect_stats: false
91
+ write_collected_feats: false
92
+ max_epoch: 60
93
+ patience: 5
94
+ val_scheduler_criterion:
95
+ - valid
96
+ - loss
97
+ early_stopping_criterion:
98
+ - valid
99
+ - loss
100
+ - min
101
+ best_model_criterion:
102
+ - - valid
103
+ - acc
104
+ - max
105
+ keep_nbest_models: 10
106
+ nbest_averaging_interval: 0
107
+ grad_clip: 5.0
108
+ grad_clip_type: 2.0
109
+ grad_noise: false
110
+ accum_grad: 8
111
+ no_forward_run: false
112
+ resume: true
113
+ train_dtype: float32
114
+ use_amp: false
115
+ log_interval: null
116
+ use_matplotlib: true
117
+ use_tensorboard: true
118
+ create_graph_in_tensorboard: false
119
+ use_wandb: false
120
+ wandb_project: null
121
+ wandb_id: null
122
+ wandb_entity: null
123
+ wandb_name: null
124
+ wandb_model_log_interval: -1
125
+ detect_anomaly: false
126
+ pretrain_path: null
127
+ init_param:
128
+ - /home/pro-c/yewei/espnet/egs2/mini_an4/asr1/exp/asr_train_asr_transformer_raw_can_char/valid.acc.ave_10best.pth
129
+ ignore_init_mismatch: true
130
+ freeze_param: []
131
+ num_iters_per_epoch: null
132
+ batch_size: 16
133
+ att_r2l_infer_weight: 0.5
134
+ rescore_r2l_max: 5
135
+ valid_batch_size: null
136
+ batch_bins: 1000000
137
+ valid_batch_bins: null
138
+ train_shape_file:
139
+ - exp/asr_stats_raw_foc_char/train/speech_shape
140
+ - exp/asr_stats_raw_foc_char/train/text_shape.char
141
+ valid_shape_file:
142
+ - exp/asr_stats_raw_foc_char/valid/speech_shape
143
+ - exp/asr_stats_raw_foc_char/valid/text_shape.char
144
+ batch_type: folded
145
+ valid_batch_type: null
146
+ fold_length:
147
+ - 80000
148
+ - 150
149
+ sort_in_batch: descending
150
+ sort_batch: descending
151
+ multiple_iterator: false
152
+ chunk_length: 500
153
+ chunk_shift_ratio: 0.5
154
+ num_cache_chunks: 1024
155
+ chunk_excluded_key_prefixes: []
156
+ train_data_path_and_name_and_type:
157
+ - - dump/raw/train/wav.scp
158
+ - speech
159
+ - sound
160
+ - - dump/raw/train/text
161
+ - text
162
+ - text
163
+ valid_data_path_and_name_and_type:
164
+ - - dump/raw/dev/wav.scp
165
+ - speech
166
+ - sound
167
+ - - dump/raw/dev/text
168
+ - text
169
+ - text
170
+ allow_variable_data_keys: false
171
+ max_cache_size: 0.0
172
+ max_cache_fd: 32
173
+ valid_max_cache_size: null
174
+ exclude_weight_decay: false
175
+ exclude_weight_decay_conf: {}
176
+ optim: adam
177
+ optim_conf:
178
+ lr: 0.005
179
+ scheduler: warmuplr
180
+ scheduler_conf:
181
+ warmup_steps: 30000
182
+ token_list:
183
+ - <blank>
184
+ - <unk>
185
+ - <space>
186
+ - '3'
187
+ - '2'
188
+ - '5'
189
+ - g
190
+ - o
191
+ - a
192
+ - n
193
+ - i
194
+ - '4'
195
+ - u
196
+ - e
197
+ - k
198
+ - '1'
199
+ - j
200
+ - y
201
+ - z
202
+ - s
203
+ - h
204
+ - d
205
+ - m
206
+ - l
207
+ - c
208
+ - b
209
+ - f
210
+ - t
211
+ - w
212
+ - p
213
+ - r
214
+ - x
215
+ - v
216
+ - q
217
+ - <sos/eos>
218
+ init: xavier_uniform
219
+ input_size: null
220
+ ctc_conf:
221
+ dropout_rate: 0.0
222
+ ctc_type: builtin
223
+ reduce: true
224
+ ignore_nan_grad: null
225
+ zero_infinity: true
226
+ joint_net_conf: null
227
+ use_preprocessor: true
228
+ token_type: char
229
+ bpemodel: null
230
+ non_linguistic_symbols: null
231
+ cleaner: null
232
+ g2p: null
233
+ speech_volume_normalize: null
234
+ rir_scp: null
235
+ rir_apply_prob: 1.0
236
+ noise_scp: null
237
+ noise_apply_prob: 1.0
238
+ noise_db_range: '13_15'
239
+ short_noise_thres: 0.5
240
+ aux_ctc_tasks: []
241
+ frontend: default
242
+ frontend_conf:
243
+ fs: 16k
244
+ specaug: null
245
+ specaug_conf: {}
246
+ normalize: global_mvn
247
+ normalize_conf:
248
+ stats_file: exp/asr_stats_raw_foc_char/train/feats_stats.npz
249
+ model: espnet
250
+ model_conf:
251
+ ctc_weight: 0.3
252
+ lsm_weight: 0.1
253
+ att_r2l_weight: 0.5
254
+ length_normalized_loss: false
255
+ preencoder: null
256
+ preencoder_conf: {}
257
+ encoder: transformer
258
+ encoder_conf:
259
+ output_size: 256
260
+ attention_heads: 4
261
+ linear_units: 2048
262
+ num_blocks: 12
263
+ dropout_rate: 0.1
264
+ positional_dropout_rate: 0.1
265
+ attention_dropout_rate: 0.0
266
+ input_layer: conv2d
267
+ normalize_before: true
268
+ postencoder: null
269
+ postencoder_conf: {}
270
+ decoder: transformer
271
+ decoder_conf:
272
+ attention_heads: 4
273
+ linear_units: 2048
274
+ num_blocks: 6
275
+ dropout_rate: 0.1
276
+ positional_dropout_rate: 0.1
277
+ self_attention_dropout_rate: 0.0
278
+ src_attention_dropout_rate: 0.0
279
+ preprocessor: default
280
+ preprocessor_conf: {}
281
+ required:
282
+ - output_dir
283
+ - token_list
284
+ version: '202301'
285
+ distributed: false
286
  ```
287
 
288
  </details>