pyf98 commited on
Commit
ec23449
1 Parent(s): 3dbe59f

add model files

Browse files
Files changed (21) hide show
  1. README.md +797 -0
  2. data/en_token_list/bpe_unigram500/bpe.model +3 -0
  3. exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz +3 -0
  4. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/RESULTS.md +32 -0
  5. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/config.yaml +694 -0
  6. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/acc.png +0 -0
  7. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/backward_time.png +0 -0
  8. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/cer.png +0 -0
  9. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/forward_time.png +0 -0
  11. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/iter_time.png +0 -0
  13. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss.png +0 -0
  14. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss_att.png +0 -0
  15. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/optim0_lr0.png +0 -0
  17. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/optim_step_time.png +0 -0
  18. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/train_time.png +0 -0
  19. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/wer.png +0 -0
  20. exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/valid.cer_ctc.ave_10best.pth +3 -0
  21. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - tedlium2
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/tedlium2_ctc_conformer_e15_linear1024`
15
+
16
+ This model was trained by Yifan Peng using tedlium2 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout e62de171f1d11015cb856f83780c61bd5ca7fa8f
26
+ pip install -e .
27
+ cd egs2/tedlium2/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/tedlium2_ctc_conformer_e15_linear1024
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Fri Dec 30 08:37:09 CST 2022`
35
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202211`
37
+ - pytorch version: `pytorch 1.12.1`
38
+ - Git hash: `e62de171f1d11015cb856f83780c61bd5ca7fa8f`
39
+ - Commit date: `Thu Dec 29 14:18:44 2022 -0500`
40
+
41
+ ## asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|14671|92.2|5.6|2.2|1.2|9.1|75.3|
47
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|27500|92.1|5.4|2.5|1.1|9.0|72.8|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|78259|97.0|0.9|2.1|1.2|4.2|75.3|
54
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|145066|96.9|0.9|2.2|1.2|4.3|72.8|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|28296|94.5|3.1|2.4|1.2|6.7|75.3|
61
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|52113|94.6|2.9|2.5|1.2|6.5|72.8|
62
+
63
+ ## ASR config
64
+
65
+ <details><summary>expand</summary>
66
+
67
+ ```
68
+ config: conf/tuning/train_asr_ctc_conformer_e15_linear1024.yaml
69
+ print_config: false
70
+ log_level: INFO
71
+ dry_run: false
72
+ iterator_type: sequence
73
+ output_dir: exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp
74
+ ngpu: 1
75
+ seed: 2022
76
+ num_workers: 4
77
+ num_att_plot: 3
78
+ dist_backend: nccl
79
+ dist_init_method: env://
80
+ dist_world_size: 2
81
+ dist_rank: 0
82
+ local_rank: 0
83
+ dist_master_addr: localhost
84
+ dist_master_port: 53439
85
+ dist_launcher: null
86
+ multiprocessing_distributed: true
87
+ unused_parameters: false
88
+ sharded_ddp: false
89
+ cudnn_enabled: true
90
+ cudnn_benchmark: false
91
+ cudnn_deterministic: true
92
+ collect_stats: false
93
+ write_collected_feats: false
94
+ max_epoch: 50
95
+ patience: null
96
+ val_scheduler_criterion:
97
+ - valid
98
+ - loss
99
+ early_stopping_criterion:
100
+ - valid
101
+ - loss
102
+ - min
103
+ best_model_criterion:
104
+ - - valid
105
+ - cer_ctc
106
+ - min
107
+ keep_nbest_models: 10
108
+ nbest_averaging_interval: 0
109
+ grad_clip: 5.0
110
+ grad_clip_type: 2.0
111
+ grad_noise: false
112
+ accum_grad: 1
113
+ no_forward_run: false
114
+ resume: true
115
+ train_dtype: float32
116
+ use_amp: true
117
+ log_interval: null
118
+ use_matplotlib: true
119
+ use_tensorboard: true
120
+ create_graph_in_tensorboard: false
121
+ use_wandb: false
122
+ wandb_project: null
123
+ wandb_id: null
124
+ wandb_entity: null
125
+ wandb_name: null
126
+ wandb_model_log_interval: -1
127
+ detect_anomaly: false
128
+ pretrain_path: null
129
+ init_param: []
130
+ ignore_init_mismatch: false
131
+ freeze_param: []
132
+ num_iters_per_epoch: null
133
+ batch_size: 20
134
+ valid_batch_size: null
135
+ batch_bins: 50000000
136
+ valid_batch_bins: null
137
+ train_shape_file:
138
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
139
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
140
+ valid_shape_file:
141
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
142
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
143
+ batch_type: numel
144
+ valid_batch_type: null
145
+ fold_length:
146
+ - 80000
147
+ - 150
148
+ sort_in_batch: descending
149
+ sort_batch: descending
150
+ multiple_iterator: false
151
+ chunk_length: 500
152
+ chunk_shift_ratio: 0.5
153
+ num_cache_chunks: 1024
154
+ train_data_path_and_name_and_type:
155
+ - - dump/raw/train_sp/wav.scp
156
+ - speech
157
+ - kaldi_ark
158
+ - - dump/raw/train_sp/text
159
+ - text
160
+ - text
161
+ valid_data_path_and_name_and_type:
162
+ - - dump/raw/dev/wav.scp
163
+ - speech
164
+ - kaldi_ark
165
+ - - dump/raw/dev/text
166
+ - text
167
+ - text
168
+ allow_variable_data_keys: false
169
+ max_cache_size: 0.0
170
+ max_cache_fd: 32
171
+ valid_max_cache_size: null
172
+ optim: adam
173
+ optim_conf:
174
+ lr: 0.002
175
+ weight_decay: 1.0e-06
176
+ scheduler: warmuplr
177
+ scheduler_conf:
178
+ warmup_steps: 15000
179
+ token_list:
180
+ - <blank>
181
+ - <unk>
182
+ - s
183
+ - ▁the
184
+ - t
185
+ - ▁a
186
+ - ▁and
187
+ - ▁to
188
+ - d
189
+ - e
190
+ - ▁of
191
+ - ''''
192
+ - n
193
+ - ing
194
+ - ▁in
195
+ - ▁i
196
+ - ▁that
197
+ - i
198
+ - a
199
+ - l
200
+ - p
201
+ - m
202
+ - y
203
+ - o
204
+ - ▁it
205
+ - ▁we
206
+ - c
207
+ - u
208
+ - ▁you
209
+ - ed
210
+ - ▁
211
+ - r
212
+ - ▁is
213
+ - re
214
+ - ▁this
215
+ - ar
216
+ - g
217
+ - ▁so
218
+ - al
219
+ - b
220
+ - ▁s
221
+ - or
222
+ - ▁f
223
+ - ▁c
224
+ - in
225
+ - k
226
+ - f
227
+ - ▁for
228
+ - ic
229
+ - er
230
+ - le
231
+ - ▁be
232
+ - ▁do
233
+ - ▁re
234
+ - ve
235
+ - ▁e
236
+ - ▁w
237
+ - ▁was
238
+ - es
239
+ - ▁they
240
+ - ly
241
+ - h
242
+ - ▁on
243
+ - v
244
+ - ▁are
245
+ - ri
246
+ - ▁have
247
+ - an
248
+ - ▁what
249
+ - ▁with
250
+ - ▁t
251
+ - w
252
+ - ur
253
+ - it
254
+ - ent
255
+ - ▁can
256
+ - ▁he
257
+ - ▁but
258
+ - ra
259
+ - ce
260
+ - ▁me
261
+ - ▁b
262
+ - ▁ma
263
+ - ▁p
264
+ - ll
265
+ - ▁st
266
+ - ▁one
267
+ - 'on'
268
+ - ▁about
269
+ - th
270
+ - ▁de
271
+ - en
272
+ - ▁all
273
+ - ▁not
274
+ - il
275
+ - ▁g
276
+ - ch
277
+ - at
278
+ - ▁there
279
+ - ▁mo
280
+ - ter
281
+ - ation
282
+ - tion
283
+ - ▁at
284
+ - ▁my
285
+ - ro
286
+ - ▁as
287
+ - te
288
+ - ▁le
289
+ - ▁con
290
+ - ▁like
291
+ - ▁people
292
+ - ▁or
293
+ - ▁an
294
+ - el
295
+ - ▁if
296
+ - ▁from
297
+ - ver
298
+ - ▁su
299
+ - ▁co
300
+ - ate
301
+ - ▁these
302
+ - ol
303
+ - ci
304
+ - ▁now
305
+ - ▁see
306
+ - ▁out
307
+ - ▁our
308
+ - ion
309
+ - ▁know
310
+ - ect
311
+ - ▁just
312
+ - as
313
+ - ▁ex
314
+ - ▁ch
315
+ - ▁d
316
+ - ▁when
317
+ - ▁very
318
+ - ▁think
319
+ - ▁who
320
+ - ▁because
321
+ - ▁go
322
+ - ▁up
323
+ - ▁us
324
+ - ▁pa
325
+ - ▁no
326
+ - ies
327
+ - ▁di
328
+ - ▁ho
329
+ - om
330
+ - ive
331
+ - ▁get
332
+ - id
333
+ - ▁o
334
+ - ▁hi
335
+ - un
336
+ - ▁how
337
+ - ▁by
338
+ - ir
339
+ - et
340
+ - ck
341
+ - ity
342
+ - ▁po
343
+ - ul
344
+ - ▁which
345
+ - ▁mi
346
+ - ▁some
347
+ - z
348
+ - ▁sp
349
+ - ▁un
350
+ - ▁going
351
+ - ▁pro
352
+ - ist
353
+ - ▁se
354
+ - ▁look
355
+ - ▁time
356
+ - ment
357
+ - de
358
+ - ▁more
359
+ - ▁had
360
+ - ng
361
+ - ▁would
362
+ - ge
363
+ - la
364
+ - ▁here
365
+ - ▁really
366
+ - x
367
+ - ▁your
368
+ - ▁them
369
+ - us
370
+ - me
371
+ - ▁en
372
+ - ▁two
373
+ - ▁k
374
+ - ▁li
375
+ - ▁world
376
+ - ne
377
+ - ow
378
+ - ▁way
379
+ - ▁want
380
+ - ▁work
381
+ - ▁don
382
+ - ▁lo
383
+ - ▁fa
384
+ - ▁were
385
+ - ▁their
386
+ - age
387
+ - vi
388
+ - ▁ha
389
+ - ac
390
+ - der
391
+ - est
392
+ - ▁bo
393
+ - am
394
+ - ▁other
395
+ - able
396
+ - ▁actually
397
+ - ▁sh
398
+ - ▁make
399
+ - ▁ba
400
+ - ▁la
401
+ - ine
402
+ - ▁into
403
+ - ▁where
404
+ - ▁could
405
+ - ▁comp
406
+ - ting
407
+ - ▁has
408
+ - ▁will
409
+ - ▁ne
410
+ - j
411
+ - ical
412
+ - ally
413
+ - ▁vi
414
+ - ▁things
415
+ - ▁te
416
+ - igh
417
+ - ▁say
418
+ - ▁years
419
+ - ers
420
+ - ▁ra
421
+ - ther
422
+ - ▁than
423
+ - ru
424
+ - ▁ro
425
+ - op
426
+ - ▁did
427
+ - ▁any
428
+ - ▁new
429
+ - ound
430
+ - ig
431
+ - ▁well
432
+ - mo
433
+ - ▁she
434
+ - ▁na
435
+ - ▁been
436
+ - he
437
+ - ▁thousand
438
+ - ▁car
439
+ - ▁take
440
+ - ▁right
441
+ - ▁then
442
+ - ▁need
443
+ - ▁start
444
+ - ▁hundred
445
+ - ▁something
446
+ - ▁over
447
+ - ▁com
448
+ - ia
449
+ - ▁kind
450
+ - um
451
+ - if
452
+ - ▁those
453
+ - ▁first
454
+ - ▁pre
455
+ - ta
456
+ - ▁said
457
+ - ize
458
+ - end
459
+ - ▁even
460
+ - ▁thing
461
+ - one
462
+ - ▁back
463
+ - ite
464
+ - ▁every
465
+ - ▁little
466
+ - ry
467
+ - ▁life
468
+ - ▁much
469
+ - ke
470
+ - ▁also
471
+ - ▁most
472
+ - ant
473
+ - per
474
+ - ▁three
475
+ - ▁come
476
+ - ▁lot
477
+ - ance
478
+ - ▁got
479
+ - ▁talk
480
+ - ▁per
481
+ - ▁inter
482
+ - ▁sa
483
+ - ▁use
484
+ - ▁mu
485
+ - ▁part
486
+ - ish
487
+ - ence
488
+ - ▁happen
489
+ - ▁bi
490
+ - ▁mean
491
+ - ough
492
+ - ▁qu
493
+ - ▁bu
494
+ - ▁day
495
+ - ▁ga
496
+ - ▁only
497
+ - ▁many
498
+ - ▁different
499
+ - ▁dr
500
+ - ▁th
501
+ - ▁show
502
+ - ful
503
+ - ▁down
504
+ - ated
505
+ - ▁good
506
+ - ▁tra
507
+ - ▁around
508
+ - ▁idea
509
+ - ▁human
510
+ - ous
511
+ - ▁put
512
+ - ▁through
513
+ - ▁five
514
+ - ▁why
515
+ - ▁change
516
+ - ▁real
517
+ - ff
518
+ - ible
519
+ - ▁fact
520
+ - ▁same
521
+ - ▁jo
522
+ - ▁live
523
+ - ▁year
524
+ - ▁problem
525
+ - ▁ph
526
+ - ▁four
527
+ - ▁give
528
+ - ▁big
529
+ - ▁tell
530
+ - ▁great
531
+ - ▁try
532
+ - ▁va
533
+ - ▁ru
534
+ - ▁system
535
+ - ▁six
536
+ - ▁plan
537
+ - ▁place
538
+ - ▁build
539
+ - ▁called
540
+ - ▁again
541
+ - ▁point
542
+ - ▁twenty
543
+ - ▁percent
544
+ - ▁nine
545
+ - ▁find
546
+ - ▁app
547
+ - ▁after
548
+ - ▁long
549
+ - ▁eight
550
+ - ▁imp
551
+ - ▁gene
552
+ - ▁design
553
+ - ▁today
554
+ - ▁should
555
+ - ▁made
556
+ - ious
557
+ - ▁came
558
+ - ▁learn
559
+ - ▁last
560
+ - ▁own
561
+ - way
562
+ - ▁turn
563
+ - ▁seven
564
+ - ▁high
565
+ - ▁question
566
+ - ▁person
567
+ - ▁brain
568
+ - ▁important
569
+ - ▁another
570
+ - ▁thought
571
+ - ▁trans
572
+ - ▁create
573
+ - ness
574
+ - ▁hu
575
+ - ▁power
576
+ - ▁act
577
+ - land
578
+ - ▁play
579
+ - ▁sort
580
+ - ▁old
581
+ - ▁before
582
+ - ▁course
583
+ - ▁understand
584
+ - ▁feel
585
+ - ▁might
586
+ - ▁each
587
+ - ▁million
588
+ - ▁better
589
+ - ▁together
590
+ - ▁ago
591
+ - ▁example
592
+ - ▁help
593
+ - ▁story
594
+ - ▁next
595
+ - ▁hand
596
+ - ▁school
597
+ - ▁water
598
+ - ▁develop
599
+ - ▁technology
600
+ - que
601
+ - ▁second
602
+ - ▁grow
603
+ - ▁still
604
+ - ▁cell
605
+ - ▁believe
606
+ - ▁number
607
+ - ▁small
608
+ - ▁between
609
+ - qui
610
+ - ▁data
611
+ - ▁become
612
+ - ▁america
613
+ - ▁maybe
614
+ - ▁space
615
+ - ▁project
616
+ - ▁organ
617
+ - ▁vo
618
+ - ▁children
619
+ - ▁book
620
+ - graph
621
+ - ▁open
622
+ - ▁fifty
623
+ - ▁picture
624
+ - ▁health
625
+ - ▁thirty
626
+ - ▁africa
627
+ - ▁reason
628
+ - ▁large
629
+ - ▁hard
630
+ - ▁computer
631
+ - ▁always
632
+ - ▁sense
633
+ - ▁money
634
+ - ▁women
635
+ - ▁everything
636
+ - ▁information
637
+ - ▁country
638
+ - ▁teach
639
+ - ▁energy
640
+ - ▁experience
641
+ - ▁food
642
+ - ▁process
643
+ - qua
644
+ - ▁interesting
645
+ - ▁future
646
+ - ▁science
647
+ - q
648
+ - '0'
649
+ - '5'
650
+ - '6'
651
+ - '9'
652
+ - '3'
653
+ - '8'
654
+ - '4'
655
+ - N
656
+ - A
657
+ - '7'
658
+ - S
659
+ - G
660
+ - F
661
+ - R
662
+ - L
663
+ - U
664
+ - E
665
+ - T
666
+ - H
667
+ - _
668
+ - B
669
+ - D
670
+ - J
671
+ - M
672
+ - ă
673
+ - ō
674
+ - ť
675
+ - '2'
676
+ - '-'
677
+ - '1'
678
+ - C
679
+ - <sos/eos>
680
+ init: null
681
+ input_size: null
682
+ ctc_conf:
683
+ dropout_rate: 0.0
684
+ ctc_type: builtin
685
+ reduce: true
686
+ ignore_nan_grad: null
687
+ zero_infinity: true
688
+ joint_net_conf: null
689
+ use_preprocessor: true
690
+ token_type: bpe
691
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
692
+ non_linguistic_symbols: null
693
+ cleaner: null
694
+ g2p: null
695
+ speech_volume_normalize: null
696
+ rir_scp: null
697
+ rir_apply_prob: 1.0
698
+ noise_scp: null
699
+ noise_apply_prob: 1.0
700
+ noise_db_range: '13_15'
701
+ short_noise_thres: 0.5
702
+ frontend: default
703
+ frontend_conf:
704
+ n_fft: 512
705
+ win_length: 400
706
+ hop_length: 160
707
+ fs: 16k
708
+ specaug: specaug
709
+ specaug_conf:
710
+ apply_time_warp: true
711
+ time_warp_window: 5
712
+ time_warp_mode: bicubic
713
+ apply_freq_mask: true
714
+ freq_mask_width_range:
715
+ - 0
716
+ - 27
717
+ num_freq_mask: 2
718
+ apply_time_mask: true
719
+ time_mask_width_ratio_range:
720
+ - 0.0
721
+ - 0.05
722
+ num_time_mask: 5
723
+ normalize: global_mvn
724
+ normalize_conf:
725
+ stats_file: exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz
726
+ model: espnet
727
+ model_conf:
728
+ ctc_weight: 1.0
729
+ lsm_weight: 0.1
730
+ length_normalized_loss: false
731
+ preencoder: null
732
+ preencoder_conf: {}
733
+ encoder: conformer
734
+ encoder_conf:
735
+ output_size: 256
736
+ attention_heads: 4
737
+ linear_units: 1024
738
+ num_blocks: 15
739
+ dropout_rate: 0.1
740
+ positional_dropout_rate: 0.1
741
+ attention_dropout_rate: 0.1
742
+ input_layer: conv2d
743
+ normalize_before: true
744
+ macaron_style: true
745
+ rel_pos_type: latest
746
+ pos_enc_layer_type: rel_pos
747
+ selfattention_layer_type: rel_selfattn
748
+ activation_type: swish
749
+ use_cnn_module: true
750
+ cnn_module_kernel: 31
751
+ postencoder: null
752
+ postencoder_conf: {}
753
+ decoder: rnn
754
+ decoder_conf: {}
755
+ preprocessor: default
756
+ preprocessor_conf: {}
757
+ required:
758
+ - output_dir
759
+ - token_list
760
+ version: '202211'
761
+ distributed: true
762
+ ```
763
+
764
+ </details>
765
+
766
+
767
+
768
+ ### Citing ESPnet
769
+
770
+ ```BibTex
771
+ @inproceedings{watanabe2018espnet,
772
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
773
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
774
+ year={2018},
775
+ booktitle={Proceedings of Interspeech},
776
+ pages={2207--2211},
777
+ doi={10.21437/Interspeech.2018-1456},
778
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
779
+ }
780
+
781
+
782
+
783
+
784
+ ```
785
+
786
+ or arXiv:
787
+
788
+ ```bibtex
789
+ @misc{watanabe2018espnet,
790
+ title={ESPnet: End-to-End Speech Processing Toolkit},
791
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
792
+ year={2018},
793
+ eprint={1804.00015},
794
+ archivePrefix={arXiv},
795
+ primaryClass={cs.CL}
796
+ }
797
+ ```
data/en_token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca848c3a0b756847776bc5c8e8ae797ad73381cb4fe9db9109b3131e9416b5f6
3
+ size 244853
exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9aa2bdc65662202e277008f62275fef28e17e564fbcf6b759a4a169cdcfdbbd
3
+ size 1402
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/RESULTS.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Fri Dec 30 08:37:09 CST 2022`
5
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202211`
7
+ - pytorch version: `pytorch 1.12.1`
8
+ - Git hash: `e62de171f1d11015cb856f83780c61bd5ca7fa8f`
9
+ - Commit date: `Thu Dec 29 14:18:44 2022 -0500`
10
+
11
+ ## asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|14671|92.2|5.6|2.2|1.2|9.1|75.3|
17
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|27500|92.1|5.4|2.5|1.1|9.0|72.8|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|78259|97.0|0.9|2.1|1.2|4.2|75.3|
24
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|145066|96.9|0.9|2.2|1.2|4.3|72.8|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/dev|466|28296|94.5|3.1|2.4|1.2|6.7|75.3|
31
+ |decode_asr_ctc_asr_model_valid.cer_ctc.ave/test|1155|52113|94.6|2.9|2.5|1.2|6.5|72.8|
32
+
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/config.yaml ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_ctc_conformer_e15_linear1024.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp
7
+ ngpu: 1
8
+ seed: 2022
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 2
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 53439
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - cer_ctc
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: true
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 50000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
72
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
73
+ valid_shape_file:
74
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
75
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/train_sp/wav.scp
89
+ - speech
90
+ - kaldi_ark
91
+ - - dump/raw/train_sp/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/dev/wav.scp
96
+ - speech
97
+ - kaldi_ark
98
+ - - dump/raw/dev/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.002
108
+ weight_decay: 1.0e-06
109
+ scheduler: warmuplr
110
+ scheduler_conf:
111
+ warmup_steps: 15000
112
+ token_list:
113
+ - <blank>
114
+ - <unk>
115
+ - s
116
+ - ▁the
117
+ - t
118
+ - ▁a
119
+ - ▁and
120
+ - ▁to
121
+ - d
122
+ - e
123
+ - ▁of
124
+ - ''''
125
+ - n
126
+ - ing
127
+ - ▁in
128
+ - ▁i
129
+ - ▁that
130
+ - i
131
+ - a
132
+ - l
133
+ - p
134
+ - m
135
+ - y
136
+ - o
137
+ - ▁it
138
+ - ▁we
139
+ - c
140
+ - u
141
+ - ▁you
142
+ - ed
143
+ - ▁
144
+ - r
145
+ - ▁is
146
+ - re
147
+ - ▁this
148
+ - ar
149
+ - g
150
+ - ▁so
151
+ - al
152
+ - b
153
+ - ▁s
154
+ - or
155
+ - ▁f
156
+ - ▁c
157
+ - in
158
+ - k
159
+ - f
160
+ - ▁for
161
+ - ic
162
+ - er
163
+ - le
164
+ - ▁be
165
+ - ▁do
166
+ - ▁re
167
+ - ve
168
+ - ▁e
169
+ - ▁w
170
+ - ▁was
171
+ - es
172
+ - ▁they
173
+ - ly
174
+ - h
175
+ - ▁on
176
+ - v
177
+ - ▁are
178
+ - ri
179
+ - ▁have
180
+ - an
181
+ - ▁what
182
+ - ▁with
183
+ - ▁t
184
+ - w
185
+ - ur
186
+ - it
187
+ - ent
188
+ - ▁can
189
+ - ▁he
190
+ - ▁but
191
+ - ra
192
+ - ce
193
+ - ▁me
194
+ - ▁b
195
+ - ▁ma
196
+ - ▁p
197
+ - ll
198
+ - ▁st
199
+ - ▁one
200
+ - 'on'
201
+ - ▁about
202
+ - th
203
+ - ▁de
204
+ - en
205
+ - ▁all
206
+ - ▁not
207
+ - il
208
+ - ▁g
209
+ - ch
210
+ - at
211
+ - ▁there
212
+ - ▁mo
213
+ - ter
214
+ - ation
215
+ - tion
216
+ - ▁at
217
+ - ▁my
218
+ - ro
219
+ - ▁as
220
+ - te
221
+ - ▁le
222
+ - ▁con
223
+ - ▁like
224
+ - ▁people
225
+ - ▁or
226
+ - ▁an
227
+ - el
228
+ - ▁if
229
+ - ▁from
230
+ - ver
231
+ - ▁su
232
+ - ▁co
233
+ - ate
234
+ - ▁these
235
+ - ol
236
+ - ci
237
+ - ▁now
238
+ - ▁see
239
+ - ▁out
240
+ - ▁our
241
+ - ion
242
+ - ▁know
243
+ - ect
244
+ - ▁just
245
+ - as
246
+ - ▁ex
247
+ - ▁ch
248
+ - ▁d
249
+ - ▁when
250
+ - ▁very
251
+ - ▁think
252
+ - ▁who
253
+ - ▁because
254
+ - ▁go
255
+ - ▁up
256
+ - ▁us
257
+ - ▁pa
258
+ - ▁no
259
+ - ies
260
+ - ▁di
261
+ - ▁ho
262
+ - om
263
+ - ive
264
+ - ▁get
265
+ - id
266
+ - ▁o
267
+ - ▁hi
268
+ - un
269
+ - ▁how
270
+ - ▁by
271
+ - ir
272
+ - et
273
+ - ck
274
+ - ity
275
+ - ▁po
276
+ - ul
277
+ - ▁which
278
+ - ▁mi
279
+ - ▁some
280
+ - z
281
+ - ▁sp
282
+ - ▁un
283
+ - ▁going
284
+ - ▁pro
285
+ - ist
286
+ - ▁se
287
+ - ▁look
288
+ - ▁time
289
+ - ment
290
+ - de
291
+ - ▁more
292
+ - ▁had
293
+ - ng
294
+ - ▁would
295
+ - ge
296
+ - la
297
+ - ▁here
298
+ - ▁really
299
+ - x
300
+ - ▁your
301
+ - ▁them
302
+ - us
303
+ - me
304
+ - ▁en
305
+ - ▁two
306
+ - ▁k
307
+ - ▁li
308
+ - ▁world
309
+ - ne
310
+ - ow
311
+ - ▁way
312
+ - ▁want
313
+ - ▁work
314
+ - ▁don
315
+ - ▁lo
316
+ - ▁fa
317
+ - ▁were
318
+ - ▁their
319
+ - age
320
+ - vi
321
+ - ▁ha
322
+ - ac
323
+ - der
324
+ - est
325
+ - ▁bo
326
+ - am
327
+ - ▁other
328
+ - able
329
+ - ▁actually
330
+ - ▁sh
331
+ - ▁make
332
+ - ▁ba
333
+ - ▁la
334
+ - ine
335
+ - ▁into
336
+ - ▁where
337
+ - ▁could
338
+ - ▁comp
339
+ - ting
340
+ - ▁has
341
+ - ▁will
342
+ - ▁ne
343
+ - j
344
+ - ical
345
+ - ally
346
+ - ▁vi
347
+ - ▁things
348
+ - ▁te
349
+ - igh
350
+ - ▁say
351
+ - ▁years
352
+ - ers
353
+ - ▁ra
354
+ - ther
355
+ - ▁than
356
+ - ru
357
+ - ▁ro
358
+ - op
359
+ - ▁did
360
+ - ▁any
361
+ - ▁new
362
+ - ound
363
+ - ig
364
+ - ▁well
365
+ - mo
366
+ - ▁she
367
+ - ▁na
368
+ - ▁been
369
+ - he
370
+ - ▁thousand
371
+ - ▁car
372
+ - ▁take
373
+ - ▁right
374
+ - ▁then
375
+ - ▁need
376
+ - ▁start
377
+ - ▁hundred
378
+ - ▁something
379
+ - ▁over
380
+ - ▁com
381
+ - ia
382
+ - ▁kind
383
+ - um
384
+ - if
385
+ - ▁those
386
+ - ▁first
387
+ - ▁pre
388
+ - ta
389
+ - ▁said
390
+ - ize
391
+ - end
392
+ - ▁even
393
+ - ▁thing
394
+ - one
395
+ - ▁back
396
+ - ite
397
+ - ▁every
398
+ - ▁little
399
+ - ry
400
+ - ▁life
401
+ - ▁much
402
+ - ke
403
+ - ▁also
404
+ - ▁most
405
+ - ant
406
+ - per
407
+ - ▁three
408
+ - ▁come
409
+ - ▁lot
410
+ - ance
411
+ - ▁got
412
+ - ▁talk
413
+ - ▁per
414
+ - ▁inter
415
+ - ▁sa
416
+ - ▁use
417
+ - ▁mu
418
+ - ▁part
419
+ - ish
420
+ - ence
421
+ - ▁happen
422
+ - ▁bi
423
+ - ▁mean
424
+ - ough
425
+ - ▁qu
426
+ - ▁bu
427
+ - ▁day
428
+ - ▁ga
429
+ - ▁only
430
+ - ▁many
431
+ - ▁different
432
+ - ▁dr
433
+ - ▁th
434
+ - ▁show
435
+ - ful
436
+ - ▁down
437
+ - ated
438
+ - ▁good
439
+ - ▁tra
440
+ - ▁around
441
+ - ▁idea
442
+ - ▁human
443
+ - ous
444
+ - ▁put
445
+ - ▁through
446
+ - ▁five
447
+ - ▁why
448
+ - ▁change
449
+ - ▁real
450
+ - ff
451
+ - ible
452
+ - ▁fact
453
+ - ▁same
454
+ - ▁jo
455
+ - ▁live
456
+ - ▁year
457
+ - ▁problem
458
+ - ▁ph
459
+ - ▁four
460
+ - ▁give
461
+ - ▁big
462
+ - ▁tell
463
+ - ▁great
464
+ - ▁try
465
+ - ▁va
466
+ - ▁ru
467
+ - ▁system
468
+ - ▁six
469
+ - ▁plan
470
+ - ▁place
471
+ - ▁build
472
+ - ▁called
473
+ - ▁again
474
+ - ▁point
475
+ - ▁twenty
476
+ - ▁percent
477
+ - ▁nine
478
+ - ▁find
479
+ - ▁app
480
+ - ▁after
481
+ - ▁long
482
+ - ▁eight
483
+ - ▁imp
484
+ - ▁gene
485
+ - ▁design
486
+ - ▁today
487
+ - ▁should
488
+ - ▁made
489
+ - ious
490
+ - ▁came
491
+ - ▁learn
492
+ - ▁last
493
+ - ▁own
494
+ - way
495
+ - ▁turn
496
+ - ▁seven
497
+ - ▁high
498
+ - ▁question
499
+ - ▁person
500
+ - ▁brain
501
+ - ▁important
502
+ - ▁another
503
+ - ▁thought
504
+ - ▁trans
505
+ - ▁create
506
+ - ness
507
+ - ▁hu
508
+ - ▁power
509
+ - ▁act
510
+ - land
511
+ - ▁play
512
+ - ▁sort
513
+ - ▁old
514
+ - ▁before
515
+ - ▁course
516
+ - ▁understand
517
+ - ▁feel
518
+ - ▁might
519
+ - ▁each
520
+ - ▁million
521
+ - ▁better
522
+ - ▁together
523
+ - ▁ago
524
+ - ▁example
525
+ - ▁help
526
+ - ▁story
527
+ - ▁next
528
+ - ▁hand
529
+ - ▁school
530
+ - ▁water
531
+ - ▁develop
532
+ - ▁technology
533
+ - que
534
+ - ▁second
535
+ - ▁grow
536
+ - ▁still
537
+ - ▁cell
538
+ - ▁believe
539
+ - ▁number
540
+ - ▁small
541
+ - ▁between
542
+ - qui
543
+ - ▁data
544
+ - ▁become
545
+ - ▁america
546
+ - ▁maybe
547
+ - ▁space
548
+ - ▁project
549
+ - ▁organ
550
+ - ▁vo
551
+ - ▁children
552
+ - ▁book
553
+ - graph
554
+ - ▁open
555
+ - ▁fifty
556
+ - ▁picture
557
+ - ▁health
558
+ - ▁thirty
559
+ - ▁africa
560
+ - ▁reason
561
+ - ▁large
562
+ - ▁hard
563
+ - ▁computer
564
+ - ▁always
565
+ - ▁sense
566
+ - ▁money
567
+ - ▁women
568
+ - ▁everything
569
+ - ▁information
570
+ - ▁country
571
+ - ▁teach
572
+ - ▁energy
573
+ - ▁experience
574
+ - ▁food
575
+ - ▁process
576
+ - qua
577
+ - ▁interesting
578
+ - ▁future
579
+ - ▁science
580
+ - q
581
+ - '0'
582
+ - '5'
583
+ - '6'
584
+ - '9'
585
+ - '3'
586
+ - '8'
587
+ - '4'
588
+ - N
589
+ - A
590
+ - '7'
591
+ - S
592
+ - G
593
+ - F
594
+ - R
595
+ - L
596
+ - U
597
+ - E
598
+ - T
599
+ - H
600
+ - _
601
+ - B
602
+ - D
603
+ - J
604
+ - M
605
+ - ă
606
+ - ō
607
+ - ť
608
+ - '2'
609
+ - '-'
610
+ - '1'
611
+ - C
612
+ - <sos/eos>
613
+ init: null
614
+ input_size: null
615
+ ctc_conf:
616
+ dropout_rate: 0.0
617
+ ctc_type: builtin
618
+ reduce: true
619
+ ignore_nan_grad: null
620
+ zero_infinity: true
621
+ joint_net_conf: null
622
+ use_preprocessor: true
623
+ token_type: bpe
624
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
625
+ non_linguistic_symbols: null
626
+ cleaner: null
627
+ g2p: null
628
+ speech_volume_normalize: null
629
+ rir_scp: null
630
+ rir_apply_prob: 1.0
631
+ noise_scp: null
632
+ noise_apply_prob: 1.0
633
+ noise_db_range: '13_15'
634
+ short_noise_thres: 0.5
635
+ frontend: default
636
+ frontend_conf:
637
+ n_fft: 512
638
+ win_length: 400
639
+ hop_length: 160
640
+ fs: 16k
641
+ specaug: specaug
642
+ specaug_conf:
643
+ apply_time_warp: true
644
+ time_warp_window: 5
645
+ time_warp_mode: bicubic
646
+ apply_freq_mask: true
647
+ freq_mask_width_range:
648
+ - 0
649
+ - 27
650
+ num_freq_mask: 2
651
+ apply_time_mask: true
652
+ time_mask_width_ratio_range:
653
+ - 0.0
654
+ - 0.05
655
+ num_time_mask: 5
656
+ normalize: global_mvn
657
+ normalize_conf:
658
+ stats_file: exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz
659
+ model: espnet
660
+ model_conf:
661
+ ctc_weight: 1.0
662
+ lsm_weight: 0.1
663
+ length_normalized_loss: false
664
+ preencoder: null
665
+ preencoder_conf: {}
666
+ encoder: conformer
667
+ encoder_conf:
668
+ output_size: 256
669
+ attention_heads: 4
670
+ linear_units: 1024
671
+ num_blocks: 15
672
+ dropout_rate: 0.1
673
+ positional_dropout_rate: 0.1
674
+ attention_dropout_rate: 0.1
675
+ input_layer: conv2d
676
+ normalize_before: true
677
+ macaron_style: true
678
+ rel_pos_type: latest
679
+ pos_enc_layer_type: rel_pos
680
+ selfattention_layer_type: rel_selfattn
681
+ activation_type: swish
682
+ use_cnn_module: true
683
+ cnn_module_kernel: 31
684
+ postencoder: null
685
+ postencoder_conf: {}
686
+ decoder: rnn
687
+ decoder_conf: {}
688
+ preprocessor: default
689
+ preprocessor_conf: {}
690
+ required:
691
+ - output_dir
692
+ - token_list
693
+ version: '202211'
694
+ distributed: true
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/acc.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/backward_time.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/cer.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/forward_time.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/iter_time.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss_att.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/train_time.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/images/wer.png ADDED
exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/valid.cer_ctc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d810af642d17518b61b483ae5a1a2b474628b12b2673f34412d44b2ff89f73b
3
+ size 103544631
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/valid.cer_ctc.ave_10best.pth
4
+ python: "3.9.15 (main, Nov 24 2022, 14:31:59) \n[GCC 11.2.0]"
5
+ timestamp: 1672411092.447774
6
+ torch: 1.12.1
7
+ yaml_files:
8
+ asr_train_config: exp/asr_train_asr_ctc_conformer_e15_linear1024_raw_en_bpe500_sp/config.yaml