“siddhu001” commited on
Commit
02017ac
1 Parent(s): 05f2fdd

Update model

Browse files
Files changed (21) hide show
  1. README.md +800 -0
  2. data/en_token_list/bpe_unigram500/bpe.model +3 -0
  3. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/RESULTS.md +2 -0
  4. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/config.yaml +727 -0
  5. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/acc.png +0 -0
  6. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/backward_time.png +0 -0
  7. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/cer.png +0 -0
  8. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/clip.png +0 -0
  9. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/forward_time.png +0 -0
  10. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/grad_norm.png +0 -0
  12. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/iter_time.png +0 -0
  13. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss.png +0 -0
  14. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss_att.png +0 -0
  15. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss_scale.png +0 -0
  16. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/optim0_lr0.png +0 -0
  17. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/optim_step_time.png +0 -0
  18. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/train_time.png +0 -0
  19. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/wer.png +0 -0
  20. exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/valid.acc.ave_10best.pth +3 -0
  21. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,803 @@
1
  ---
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - slue-ted
9
  license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/slueted_whisper_summ`
15
+
16
+ This model was trained by “siddhu001” using slue-ted recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout e23ef85f0b3116ad5c60d0833f186da0deec0734
26
+ pip install -e .
27
+ cd egs2/slue-ted/slu1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/slueted_whisper_summ
29
+ ```
30
+
31
+ {'rouge1': 0.2255418629519756, 'rouge2': 0.0485061537185737, 'rougeL': 0.1596465851004139, 'rougeLsum': 0.15968116069467322, 'meteor': 0.2129616261465529}
32
+ RESULT 22.55418629519756 3.799127541421444e-132 15.96465851004139 21.29616261465529 83.78519008627457
33
+
34
+ ## ASR config
35
+
36
+ <details><summary>expand</summary>
37
+
38
+ ```
39
+ config: conf//train_asr_whisper_weighted_conv2d2.yaml
40
+ print_config: false
41
+ log_level: INFO
42
+ drop_last_iter: false
43
+ dry_run: false
44
+ iterator_type: sequence
45
+ valid_iterator_type: null
46
+ output_dir: exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp
47
+ ngpu: 1
48
+ seed: 2022
49
+ num_workers: 2
50
+ num_att_plot: 3
51
+ dist_backend: nccl
52
+ dist_init_method: env://
53
+ dist_world_size: 4
54
+ dist_rank: 0
55
+ local_rank: 0
56
+ dist_master_addr: localhost
57
+ dist_master_port: 42635
58
+ dist_launcher: null
59
+ multiprocessing_distributed: true
60
+ unused_parameters: false
61
+ sharded_ddp: false
62
+ cudnn_enabled: true
63
+ cudnn_benchmark: false
64
+ cudnn_deterministic: true
65
+ collect_stats: false
66
+ write_collected_feats: false
67
+ max_epoch: 25
68
+ patience: null
69
+ val_scheduler_criterion:
70
+ - valid
71
+ - loss
72
+ early_stopping_criterion:
73
+ - valid
74
+ - loss
75
+ - min
76
+ best_model_criterion:
77
+ - - valid
78
+ - acc
79
+ - max
80
+ keep_nbest_models: 10
81
+ nbest_averaging_interval: 0
82
+ grad_clip: 5.0
83
+ grad_clip_type: 2.0
84
+ grad_noise: false
85
+ accum_grad: 1
86
+ no_forward_run: false
87
+ resume: true
88
+ train_dtype: float32
89
+ use_amp: false
90
+ log_interval: 100
91
+ use_matplotlib: true
92
+ use_tensorboard: true
93
+ create_graph_in_tensorboard: false
94
+ use_wandb: false
95
+ wandb_project: null
96
+ wandb_id: null
97
+ wandb_entity: null
98
+ wandb_name: null
99
+ wandb_model_log_interval: -1
100
+ detect_anomaly: false
101
+ use_lora: false
102
+ save_lora_only: true
103
+ lora_conf: {}
104
+ pretrain_path: null
105
+ init_param:
106
+ - /scratch/bbjs/arora1/espnet_slue_PR/espnet/egs2/tedlium3/asr1/exp/asr_train_asr_whisper_weighted_conv2d2_raw_en_bpe500/valid.acc.ave_10best.pth:::ctc
107
+ ignore_init_mismatch: false
108
+ freeze_param:
109
+ - encoder
110
+ num_iters_per_epoch: null
111
+ batch_size: 20
112
+ valid_batch_size: null
113
+ batch_bins: 12000000
114
+ valid_batch_bins: null
115
+ train_shape_file:
116
+ - exp/slu_stats_raw_en_bpe500_sp/train/speech_shape
117
+ - exp/slu_stats_raw_en_bpe500_sp/train/text_shape.bpe
118
+ valid_shape_file:
119
+ - exp/slu_stats_raw_en_bpe500_sp/valid/speech_shape
120
+ - exp/slu_stats_raw_en_bpe500_sp/valid/text_shape.bpe
121
+ batch_type: numel
122
+ valid_batch_type: null
123
+ fold_length:
124
+ - 80000
125
+ - 150
126
+ sort_in_batch: descending
127
+ shuffle_within_batch: false
128
+ sort_batch: descending
129
+ multiple_iterator: false
130
+ chunk_length: 500
131
+ chunk_shift_ratio: 0.5
132
+ num_cache_chunks: 1024
133
+ chunk_excluded_key_prefixes: []
134
+ chunk_default_fs: null
135
+ train_data_path_and_name_and_type:
136
+ - - dump/raw/train_sp/wav.scp
137
+ - speech
138
+ - kaldi_ark
139
+ - - dump/raw/train_sp/text
140
+ - text
141
+ - text
142
+ valid_data_path_and_name_and_type:
143
+ - - dump/raw/devel/wav.scp
144
+ - speech
145
+ - kaldi_ark
146
+ - - dump/raw/devel/text
147
+ - text
148
+ - text
149
+ allow_variable_data_keys: false
150
+ max_cache_size: 0.0
151
+ max_cache_fd: 32
152
+ allow_multi_rates: false
153
+ valid_max_cache_size: null
154
+ exclude_weight_decay: false
155
+ exclude_weight_decay_conf: {}
156
+ optim: adam
157
+ optim_conf:
158
+ lr: 0.002
159
+ weight_decay: 1.0e-06
160
+ scheduler: warmuplr
161
+ scheduler_conf:
162
+ warmup_steps: 5000
163
+ token_list:
164
+ - <blank>
165
+ - <unk>
166
+ - '[sep]'
167
+ - '&quot;'
168
+ - s
169
+ - ▁
170
+ - ▁the
171
+ - ','
172
+ - t
173
+ - d
174
+ - ▁a
175
+ - .
176
+ - ing
177
+ - o
178
+ - e
179
+ - ▁to
180
+ - a
181
+ - ▁and
182
+ - y
183
+ - n
184
+ - ▁of
185
+ - r
186
+ - ▁in
187
+ - u
188
+ - i
189
+ - m
190
+ - p
191
+ - c
192
+ - er
193
+ - g
194
+ - l
195
+ - al
196
+ - re
197
+ - ed
198
+ - b
199
+ - ''''
200
+ - ar
201
+ - k
202
+ - in
203
+ - f
204
+ - ▁"
205
+ - le
206
+ - 'on'
207
+ - v
208
+ - or
209
+ - th
210
+ - '-'
211
+ - ▁c
212
+ - en
213
+ - ▁f
214
+ - ▁--
215
+ - ▁we
216
+ - ▁for
217
+ - ▁how
218
+ - ly
219
+ - ▁re
220
+ - se
221
+ - ▁that
222
+ - es
223
+ - w
224
+ - ic
225
+ - st
226
+ - ▁w
227
+ - ▁be
228
+ - ri
229
+ - an
230
+ - ra
231
+ - ve
232
+ - ce
233
+ - ur
234
+ - ▁by
235
+ - ▁it
236
+ - li
237
+ - ▁de
238
+ - '?'
239
+ - it
240
+ - ch
241
+ - ent
242
+ - ▁is
243
+ - ter
244
+ - el
245
+ - ▁on
246
+ - ▁e
247
+ - ▁he
248
+ - ▁co
249
+ - ▁an
250
+ - ▁ma
251
+ - ▁st
252
+ - ll
253
+ - ▁with
254
+ - ▁can
255
+ - il
256
+ - ▁you
257
+ - ▁us
258
+ - ation
259
+ - te
260
+ - ▁this
261
+ - ▁b
262
+ - ▁do
263
+ - ▁g
264
+ - me
265
+ - ▁what
266
+ - ck
267
+ - ▁from
268
+ - ate
269
+ - ▁p
270
+ - z
271
+ - la
272
+ - ▁mo
273
+ - ▁di
274
+ - ive
275
+ - mp
276
+ - ▁talk
277
+ - ity
278
+ - vi
279
+ - ta
280
+ - at
281
+ - ge
282
+ - ▁tr
283
+ - ▁she
284
+ - ▁our
285
+ - ▁pa
286
+ - ci
287
+ - et
288
+ - h
289
+ - ▁su
290
+ - ver
291
+ - ▁world
292
+ - pe
293
+ - ▁about
294
+ - ▁me
295
+ - ▁so
296
+ - and
297
+ - ▁con
298
+ - tion
299
+ - de
300
+ - ir
301
+ - ▁her
302
+ - im
303
+ - ':'
304
+ - ▁his
305
+ - ies
306
+ - ▁po
307
+ - ▁are
308
+ - ect
309
+ - lo
310
+ - ▁your
311
+ - un
312
+ - ist
313
+ - hi
314
+ - ▁mi
315
+ - x
316
+ - id
317
+ - ment
318
+ - ol
319
+ - ul
320
+ - ti
321
+ - ne
322
+ - qu
323
+ - ▁but
324
+ - ▁ca
325
+ - ▁fa
326
+ - ▁as
327
+ - ▁un
328
+ - ers
329
+ - ight
330
+ - ▁says
331
+ - '0'
332
+ - ng
333
+ - op
334
+ - '1'
335
+ - ▁k
336
+ - ad
337
+ - j
338
+ - ma
339
+ - ▁pro
340
+ - ▁work
341
+ - ▁ba
342
+ - ▁share
343
+ - ▁new
344
+ - ▁more
345
+ - ▁vi
346
+ - ▁sa
347
+ - ▁at
348
+ - ▁la
349
+ - ut
350
+ - bi
351
+ - sion
352
+ - ▁ho
353
+ - na
354
+ - act
355
+ - age
356
+ - ke
357
+ - if
358
+ - ▁bo
359
+ - ▁br
360
+ - ▁ha
361
+ - ▁no
362
+ - co
363
+ - ▁lo
364
+ - mi
365
+ - ▁make
366
+ - ▁people
367
+ - ▁why
368
+ - ant
369
+ - ▁their
370
+ - ▁i
371
+ - ▁life
372
+ - ▁all
373
+ - ting
374
+ - ▁human
375
+ - ▁have
376
+ - om
377
+ - )
378
+ - ▁(
379
+ - ▁help
380
+ - ▁ted
381
+ - wa
382
+ - sh
383
+ - ▁da
384
+ - ▁le
385
+ - ▁out
386
+ - ph
387
+ - ical
388
+ - ▁way
389
+ - ff
390
+ - ▁ro
391
+ - able
392
+ - ▁some
393
+ - est
394
+ - ure
395
+ - em
396
+ - ho
397
+ - ▁ex
398
+ - gen
399
+ - ha
400
+ - ia
401
+ - ine
402
+ - ▁into
403
+ - ca
404
+ - ▁was
405
+ - ▁who
406
+ - ther
407
+ - ▁they
408
+ - ow
409
+ - he
410
+ - ▁one
411
+ - ▁when
412
+ - form
413
+ - ▁pre
414
+ - ni
415
+ - ▁could
416
+ - ▁like
417
+ - ▁per
418
+ - ▁up
419
+ - ance
420
+ - com
421
+ - ▁go
422
+ - ion
423
+ - tor
424
+ - ▁fe
425
+ - ▁ra
426
+ - ▁or
427
+ - ▁en
428
+ - ▁change
429
+ - tic
430
+ - ▁every
431
+ - ▁jo
432
+ - ence
433
+ - ▁not
434
+ - ▁art
435
+ - one
436
+ - use
437
+ - ous
438
+ - ▁plan
439
+ - ▁music
440
+ - ▁exp
441
+ - und
442
+ - ▁ne
443
+ - um
444
+ - ative
445
+ - pp
446
+ - ▁need
447
+ - tro
448
+ - directed
449
+ - ▁learn
450
+ - ▁narrate
451
+ - ▁has
452
+ - lar
453
+ - '].'
454
+ - man
455
+ - ▁car
456
+ - ▁future
457
+ - ▁real
458
+ - ▁time
459
+ - ize
460
+ - ▁live
461
+ - ber
462
+ - ▁mar
463
+ - ▁ga
464
+ - ▁take
465
+ - ▁dr
466
+ - ful
467
+ - ▁get
468
+ - ▁shows
469
+ - day
470
+ - ▁cha
471
+ - ▁than
472
+ - ▁know
473
+ - ian
474
+ - ▁see
475
+ - ▁just
476
+ - '2'
477
+ - ▁other
478
+ - old
479
+ - ▁design
480
+ - ▁chi
481
+ - ▁build
482
+ - ious
483
+ - ▁most
484
+ - ▁si
485
+ - ▁will
486
+ - ▁power
487
+ - ▁think
488
+ - port
489
+ - ▁over
490
+ - ▁ja
491
+ - ish
492
+ - ▁climate
493
+ - ▁sha
494
+ - ▁through
495
+ - less
496
+ - '3'
497
+ - ▁my
498
+ - ▁where
499
+ - ▁global
500
+ - ▁health
501
+ - ▁pri
502
+ - ▁20
503
+ - ▁story
504
+ - gu
505
+ - ugh
506
+ - ▁create
507
+ - ▁look
508
+ - ▁trans
509
+ - ▁har
510
+ - ▁even
511
+ - ▁part
512
+ - ▁years
513
+ - ▁lead
514
+ - side
515
+ - low
516
+ - long
517
+ - ▁technolog
518
+ - ness
519
+ - '5'
520
+ - ▁call
521
+ - ▁sc
522
+ - ▁system
523
+ - '9'
524
+ - line
525
+ - ▁brain
526
+ - ▁data
527
+ - ▁own
528
+ - ition
529
+ - ▁explains
530
+ - ▁tell
531
+ - ▁explore
532
+ - ▁start
533
+ - ▁ru
534
+ - ▁which
535
+ - ▁anderson
536
+ - ▁find
537
+ - ▁hu
538
+ - ▁women
539
+ - ▁better
540
+ - ▁idea
541
+ - ▁history
542
+ - ▁research
543
+ - ▁science
544
+ - ism
545
+ - ▁first
546
+ - ▁grow
547
+ - ▁right
548
+ - clu
549
+ - ▁space
550
+ - ▁develop
551
+ - ▁problem
552
+ - ▁two
553
+ - ▁earth
554
+ - ologist
555
+ - ▁many
556
+ - ▁should
557
+ - ▁three
558
+ - ▁fellow
559
+ - ▁social
560
+ - ▁africa
561
+ - ▁...
562
+ - '4'
563
+ - ▁addis
564
+ - ▁powerful
565
+ - ▁found
566
+ - ▁under
567
+ - ▁understand
568
+ - ▁after
569
+ - ▁stories
570
+ - ▁around
571
+ - ▁personal
572
+ - ▁project
573
+ - ▁between
574
+ - ▁question
575
+ - ▁play
576
+ - ▁scientist
577
+ - ▁happen
578
+ - ▁good
579
+ - ▁produc
580
+ - ▁experience
581
+ - ▁step
582
+ - ▁america
583
+ - '8'
584
+ - ▁great
585
+ - ▁down
586
+ - ▁high
587
+ - ▁would
588
+ - ▁turn
589
+ - ▁surprising
590
+ - ▁imagin
591
+ - ▁teach
592
+ - cross
593
+ - ▁place
594
+ - ▁medic
595
+ - ▁million
596
+ - ▁things
597
+ - '7'
598
+ - ▁reveal
599
+ - ▁without
600
+ - ▁challenge
601
+ - ▁next
602
+ - ▁each
603
+ - ▁studio
604
+ - organ
605
+ - '6'
606
+ - ▁business
607
+ - ▁much
608
+ - ▁show
609
+ - ▁conversation
610
+ - ▁energy
611
+ - ▁school
612
+ - ▁ocean
613
+ - ▁while
614
+ - source
615
+ - ization
616
+ - ▁break
617
+ - ▁robot
618
+ - ▁disease
619
+ - ▁behind
620
+ - ability
621
+ - ▁team
622
+ - ▁chris
623
+ - ▁become
624
+ - ▁solution
625
+ - ▁protect
626
+ - ▁collect
627
+ - ▁different
628
+ - ▁those
629
+ - ▁connect
630
+ - ▁architect
631
+ - ▁language
632
+ - ▁simple
633
+ - ▁solve
634
+ - ▁before
635
+ - ▁community
636
+ - ▁country
637
+ - ▁secret
638
+ - ▁keep
639
+ - ▁food
640
+ - ▁thought
641
+ - ▁discover
642
+ - ▁environment
643
+ - ▁government
644
+ - ▁public
645
+ - ;
646
+ - '!'
647
+ - /
648
+ - q
649
+ - '%'
650
+ - '@'
651
+ - ']'
652
+ - +
653
+ - '&'
654
+ - '|'
655
+ - _
656
+ - (
657
+ - '"'
658
+ - $
659
+ - '*'
660
+ - '='
661
+ - '['
662
+ - '`'
663
+ - <sos/eos>
664
+ transcript_token_list: null
665
+ two_pass: false
666
+ pre_postencoder_norm: false
667
+ init: null
668
+ input_size: 1
669
+ ctc_conf:
670
+ dropout_rate: 0.0
671
+ ctc_type: builtin
672
+ reduce: true
673
+ ignore_nan_grad: null
674
+ zero_infinity: true
675
+ brctc_risk_strategy: exp
676
+ brctc_group_strategy: end
677
+ brctc_risk_factor: 0.0
678
+ joint_net_conf: null
679
+ use_preprocessor: true
680
+ token_type: bpe
681
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
682
+ non_linguistic_symbols: null
683
+ cleaner: null
684
+ g2p: null
685
+ speech_volume_normalize: null
686
+ rir_scp: null
687
+ rir_apply_prob: 1.0
688
+ noise_scp: null
689
+ noise_apply_prob: 1.0
690
+ noise_db_range: '13_15'
691
+ short_noise_thres: 0.5
692
+ frontend: null
693
+ frontend_conf: {}
694
+ specaug: null
695
+ specaug_conf: {}
696
+ normalize: null
697
+ normalize_conf: {}
698
+ model: espnet
699
+ model_conf:
700
+ ctc_weight: 0.0
701
+ lsm_weight: 0.1
702
+ length_normalized_loss: false
703
+ weighted_sum: true
704
+ extract_feats_in_collect_stats: false
705
+ preencoder: null
706
+ preencoder_conf: {}
707
+ encoder: whisper
708
+ encoder_conf:
709
+ whisper_model: medium
710
+ dropout_rate: 0.0
711
+ use_specaug: true
712
+ specaug_conf:
713
+ apply_time_warp: true
714
+ time_warp_window: 5
715
+ time_warp_mode: bicubic
716
+ apply_freq_mask: true
717
+ freq_mask_width_range:
718
+ - 0
719
+ - 40
720
+ num_freq_mask: 2
721
+ apply_time_mask: true
722
+ time_mask_width_ratio_range:
723
+ - 0.0
724
+ - 0.12
725
+ num_time_mask: 5
726
+ prepostencoder: linear
727
+ prepostencoder_conf:
728
+ input_size: 1024
729
+ output_size: 80
730
+ postencoder: conformer_full
731
+ postencoder_conf:
732
+ output_size: 256
733
+ attention_heads: 4
734
+ linear_units: 1024
735
+ num_blocks: 12
736
+ dropout_rate: 0.1
737
+ positional_dropout_rate: 0.1
738
+ attention_dropout_rate: 0.1
739
+ input_layer: conv2d2
740
+ normalize_before: true
741
+ macaron_style: true
742
+ rel_pos_type: latest
743
+ pos_enc_layer_type: rel_pos
744
+ selfattention_layer_type: rel_selfattn
745
+ activation_type: swish
746
+ use_cnn_module: true
747
+ cnn_module_kernel: 31
748
+ deliberationencoder: null
749
+ deliberationencoder_conf: {}
750
+ decoder: transformer
751
+ decoder_conf:
752
+ attention_heads: 4
753
+ linear_units: 2048
754
+ num_blocks: 6
755
+ dropout_rate: 0.1
756
+ positional_dropout_rate: 0.1
757
+ self_attention_dropout_rate: 0.1
758
+ src_attention_dropout_rate: 0.1
759
+ postdecoder: null
760
+ postdecoder_conf: {}
761
+ required:
762
+ - output_dir
763
+ - token_list
764
+ version: '202310'
765
+ distributed: true
766
+ ```
767
+
768
+ </details>
769
+
770
+
771
+
772
+ ### Citing ESPnet
773
+
774
+ ```BibTex
775
+ @inproceedings{watanabe2018espnet,
776
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
777
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
778
+ year={2018},
779
+ booktitle={Proceedings of Interspeech},
780
+ pages={2207--2211},
781
+ doi={10.21437/Interspeech.2018-1456},
782
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
783
+ }
784
+
785
+
786
+
787
+
788
+
789
+
790
+ ```
791
+
792
+ or arXiv:
793
+
794
+ ```bibtex
795
+ @misc{watanabe2018espnet,
796
+ title={ESPnet: End-to-End Speech Processing Toolkit},
797
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
798
+ year={2018},
799
+ eprint={1804.00015},
800
+ archivePrefix={arXiv},
801
+ primaryClass={cs.CL}
802
+ }
803
+ ```
data/en_token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9070b5fe2b321a13162f2aa8159c6f637402b46fb032510dbb1dabfcf0afa24f
3
+ size 244766
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/RESULTS.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {'rouge1': 0.2255418629519756, 'rouge2': 0.0485061537185737, 'rougeL': 0.1596465851004139, 'rougeLsum': 0.15968116069467322, 'meteor': 0.2129616261465529}
2
+ RESULT 22.55418629519756 3.799127541421444e-132 15.96465851004139 21.29616261465529 83.78519008627457
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/config.yaml ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf//train_asr_whisper_weighted_conv2d2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp
9
+ ngpu: 1
10
+ seed: 2022
11
+ num_workers: 2
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 42635
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 25
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - acc
41
+ - max
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param:
68
+ - /scratch/bbjs/arora1/espnet_slue_PR/espnet/egs2/tedlium3/asr1/exp/asr_train_asr_whisper_weighted_conv2d2_raw_en_bpe500/valid.acc.ave_10best.pth:::ctc
69
+ ignore_init_mismatch: false
70
+ freeze_param:
71
+ - encoder
72
+ num_iters_per_epoch: null
73
+ batch_size: 20
74
+ valid_batch_size: null
75
+ batch_bins: 12000000
76
+ valid_batch_bins: null
77
+ train_shape_file:
78
+ - exp/slu_stats_raw_en_bpe500_sp/train/speech_shape
79
+ - exp/slu_stats_raw_en_bpe500_sp/train/text_shape.bpe
80
+ valid_shape_file:
81
+ - exp/slu_stats_raw_en_bpe500_sp/valid/speech_shape
82
+ - exp/slu_stats_raw_en_bpe500_sp/valid/text_shape.bpe
83
+ batch_type: numel
84
+ valid_batch_type: null
85
+ fold_length:
86
+ - 80000
87
+ - 150
88
+ sort_in_batch: descending
89
+ shuffle_within_batch: false
90
+ sort_batch: descending
91
+ multiple_iterator: false
92
+ chunk_length: 500
93
+ chunk_shift_ratio: 0.5
94
+ num_cache_chunks: 1024
95
+ chunk_excluded_key_prefixes: []
96
+ chunk_default_fs: null
97
+ train_data_path_and_name_and_type:
98
+ - - dump/raw/train_sp/wav.scp
99
+ - speech
100
+ - kaldi_ark
101
+ - - dump/raw/train_sp/text
102
+ - text
103
+ - text
104
+ valid_data_path_and_name_and_type:
105
+ - - dump/raw/devel/wav.scp
106
+ - speech
107
+ - kaldi_ark
108
+ - - dump/raw/devel/text
109
+ - text
110
+ - text
111
+ allow_variable_data_keys: false
112
+ max_cache_size: 0.0
113
+ max_cache_fd: 32
114
+ allow_multi_rates: false
115
+ valid_max_cache_size: null
116
+ exclude_weight_decay: false
117
+ exclude_weight_decay_conf: {}
118
+ optim: adam
119
+ optim_conf:
120
+ lr: 0.002
121
+ weight_decay: 1.0e-06
122
+ scheduler: warmuplr
123
+ scheduler_conf:
124
+ warmup_steps: 5000
125
+ token_list:
126
+ - <blank>
127
+ - <unk>
128
+ - '[sep]'
129
+ - '&quot;'
130
+ - s
131
+ - ▁
132
+ - ▁the
133
+ - ','
134
+ - t
135
+ - d
136
+ - ▁a
137
+ - .
138
+ - ing
139
+ - o
140
+ - e
141
+ - ▁to
142
+ - a
143
+ - ▁and
144
+ - y
145
+ - n
146
+ - ▁of
147
+ - r
148
+ - ▁in
149
+ - u
150
+ - i
151
+ - m
152
+ - p
153
+ - c
154
+ - er
155
+ - g
156
+ - l
157
+ - al
158
+ - re
159
+ - ed
160
+ - b
161
+ - ''''
162
+ - ar
163
+ - k
164
+ - in
165
+ - f
166
+ - ▁"
167
+ - le
168
+ - 'on'
169
+ - v
170
+ - or
171
+ - th
172
+ - '-'
173
+ - ▁c
174
+ - en
175
+ - ▁f
176
+ - ▁--
177
+ - ▁we
178
+ - ▁for
179
+ - ▁how
180
+ - ly
181
+ - ▁re
182
+ - se
183
+ - ▁that
184
+ - es
185
+ - w
186
+ - ic
187
+ - st
188
+ - ▁w
189
+ - ▁be
190
+ - ri
191
+ - an
192
+ - ra
193
+ - ve
194
+ - ce
195
+ - ur
196
+ - ▁by
197
+ - ▁it
198
+ - li
199
+ - ▁de
200
+ - '?'
201
+ - it
202
+ - ch
203
+ - ent
204
+ - ▁is
205
+ - ter
206
+ - el
207
+ - ▁on
208
+ - ▁e
209
+ - ▁he
210
+ - ▁co
211
+ - ▁an
212
+ - ▁ma
213
+ - ▁st
214
+ - ll
215
+ - ▁with
216
+ - ▁can
217
+ - il
218
+ - ▁you
219
+ - ▁us
220
+ - ation
221
+ - te
222
+ - ▁this
223
+ - ▁b
224
+ - ▁do
225
+ - ▁g
226
+ - me
227
+ - ▁what
228
+ - ck
229
+ - ▁from
230
+ - ate
231
+ - ▁p
232
+ - z
233
+ - la
234
+ - ▁mo
235
+ - ▁di
236
+ - ive
237
+ - mp
238
+ - ▁talk
239
+ - ity
240
+ - vi
241
+ - ta
242
+ - at
243
+ - ge
244
+ - ▁tr
245
+ - ▁she
246
+ - ▁our
247
+ - ▁pa
248
+ - ci
249
+ - et
250
+ - h
251
+ - ▁su
252
+ - ver
253
+ - ▁world
254
+ - pe
255
+ - ▁about
256
+ - ▁me
257
+ - ▁so
258
+ - and
259
+ - ▁con
260
+ - tion
261
+ - de
262
+ - ir
263
+ - ▁her
264
+ - im
265
+ - ':'
266
+ - ▁his
267
+ - ies
268
+ - ▁po
269
+ - ▁are
270
+ - ect
271
+ - lo
272
+ - ▁your
273
+ - un
274
+ - ist
275
+ - hi
276
+ - ▁mi
277
+ - x
278
+ - id
279
+ - ment
280
+ - ol
281
+ - ul
282
+ - ti
283
+ - ne
284
+ - qu
285
+ - ▁but
286
+ - ▁ca
287
+ - ▁fa
288
+ - ▁as
289
+ - ▁un
290
+ - ers
291
+ - ight
292
+ - ▁says
293
+ - '0'
294
+ - ng
295
+ - op
296
+ - '1'
297
+ - ▁k
298
+ - ad
299
+ - j
300
+ - ma
301
+ - ▁pro
302
+ - ▁work
303
+ - ▁ba
304
+ - ▁share
305
+ - ▁new
306
+ - ▁more
307
+ - ▁vi
308
+ - ▁sa
309
+ - ▁at
310
+ - ▁la
311
+ - ut
312
+ - bi
313
+ - sion
314
+ - ▁ho
315
+ - na
316
+ - act
317
+ - age
318
+ - ke
319
+ - if
320
+ - ▁bo
321
+ - ▁br
322
+ - ▁ha
323
+ - ▁no
324
+ - co
325
+ - ▁lo
326
+ - mi
327
+ - ▁make
328
+ - ▁people
329
+ - ▁why
330
+ - ant
331
+ - ▁their
332
+ - ▁i
333
+ - ▁life
334
+ - ▁all
335
+ - ting
336
+ - ▁human
337
+ - ▁have
338
+ - om
339
+ - )
340
+ - ▁(
341
+ - ▁help
342
+ - ▁ted
343
+ - wa
344
+ - sh
345
+ - ▁da
346
+ - ▁le
347
+ - ▁out
348
+ - ph
349
+ - ical
350
+ - ▁way
351
+ - ff
352
+ - ▁ro
353
+ - able
354
+ - ▁some
355
+ - est
356
+ - ure
357
+ - em
358
+ - ho
359
+ - ▁ex
360
+ - gen
361
+ - ha
362
+ - ia
363
+ - ine
364
+ - ▁into
365
+ - ca
366
+ - ▁was
367
+ - ▁who
368
+ - ther
369
+ - ▁they
370
+ - ow
371
+ - he
372
+ - ▁one
373
+ - ▁when
374
+ - form
375
+ - ▁pre
376
+ - ni
377
+ - ▁could
378
+ - ▁like
379
+ - ▁per
380
+ - ▁up
381
+ - ance
382
+ - com
383
+ - ▁go
384
+ - ion
385
+ - tor
386
+ - ▁fe
387
+ - ▁ra
388
+ - ▁or
389
+ - ▁en
390
+ - ▁change
391
+ - tic
392
+ - ▁every
393
+ - ▁jo
394
+ - ence
395
+ - ▁not
396
+ - ▁art
397
+ - one
398
+ - use
399
+ - ous
400
+ - ▁plan
401
+ - ▁music
402
+ - ▁exp
403
+ - und
404
+ - ▁ne
405
+ - um
406
+ - ative
407
+ - pp
408
+ - ▁need
409
+ - tro
410
+ - directed
411
+ - ▁learn
412
+ - ▁narrate
413
+ - ▁has
414
+ - lar
415
+ - '].'
416
+ - man
417
+ - ▁car
418
+ - ▁future
419
+ - ▁real
420
+ - ▁time
421
+ - ize
422
+ - ▁live
423
+ - ber
424
+ - ▁mar
425
+ - ▁ga
426
+ - ▁take
427
+ - ▁dr
428
+ - ful
429
+ - ▁get
430
+ - ▁shows
431
+ - day
432
+ - ▁cha
433
+ - ▁than
434
+ - ▁know
435
+ - ian
436
+ - ▁see
437
+ - ▁just
438
+ - '2'
439
+ - ▁other
440
+ - old
441
+ - ▁design
442
+ - ▁chi
443
+ - ▁build
444
+ - ious
445
+ - ▁most
446
+ - ▁si
447
+ - ▁will
448
+ - ▁power
449
+ - ▁think
450
+ - port
451
+ - ▁over
452
+ - ▁ja
453
+ - ish
454
+ - ▁climate
455
+ - ▁sha
456
+ - ▁through
457
+ - less
458
+ - '3'
459
+ - ▁my
460
+ - ▁where
461
+ - ▁global
462
+ - ▁health
463
+ - ▁pri
464
+ - ▁20
465
+ - ▁story
466
+ - gu
467
+ - ugh
468
+ - ▁create
469
+ - ▁look
470
+ - ▁trans
471
+ - ▁har
472
+ - ▁even
473
+ - ▁part
474
+ - ▁years
475
+ - ▁lead
476
+ - side
477
+ - low
478
+ - long
479
+ - ▁technolog
480
+ - ness
481
+ - '5'
482
+ - ▁call
483
+ - ▁sc
484
+ - ▁system
485
+ - '9'
486
+ - line
487
+ - ▁brain
488
+ - ▁data
489
+ - ▁own
490
+ - ition
491
+ - ▁explains
492
+ - ▁tell
493
+ - ▁explore
494
+ - ▁start
495
+ - ▁ru
496
+ - ▁which
497
+ - ▁anderson
498
+ - ▁find
499
+ - ▁hu
500
+ - ▁women
501
+ - ▁better
502
+ - ▁idea
503
+ - ▁history
504
+ - ▁research
505
+ - ▁science
506
+ - ism
507
+ - ▁first
508
+ - ▁grow
509
+ - ▁right
510
+ - clu
511
+ - ▁space
512
+ - ▁develop
513
+ - ▁problem
514
+ - ▁two
515
+ - ▁earth
516
+ - ologist
517
+ - ▁many
518
+ - ▁should
519
+ - ▁three
520
+ - ▁fellow
521
+ - ▁social
522
+ - ▁africa
523
+ - ▁...
524
+ - '4'
525
+ - ▁addis
526
+ - ▁powerful
527
+ - ▁found
528
+ - ▁under
529
+ - ▁understand
530
+ - ▁after
531
+ - ▁stories
532
+ - ▁around
533
+ - ▁personal
534
+ - ▁project
535
+ - ▁between
536
+ - ▁question
537
+ - ▁play
538
+ - ▁scientist
539
+ - ▁happen
540
+ - ▁good
541
+ - ▁produc
542
+ - ▁experience
543
+ - ▁step
544
+ - ▁america
545
+ - '8'
546
+ - ▁great
547
+ - ▁down
548
+ - ▁high
549
+ - ▁would
550
+ - ▁turn
551
+ - ▁surprising
552
+ - ▁imagin
553
+ - ▁teach
554
+ - cross
555
+ - ▁place
556
+ - ▁medic
557
+ - ▁million
558
+ - ▁things
559
+ - '7'
560
+ - ▁reveal
561
+ - ▁without
562
+ - ▁challenge
563
+ - ▁next
564
+ - ▁each
565
+ - ▁studio
566
+ - organ
567
+ - '6'
568
+ - ▁business
569
+ - ▁much
570
+ - ▁show
571
+ - ▁conversation
572
+ - ▁energy
573
+ - ▁school
574
+ - ▁ocean
575
+ - ▁while
576
+ - source
577
+ - ization
578
+ - ▁break
579
+ - ▁robot
580
+ - ▁disease
581
+ - ▁behind
582
+ - ability
583
+ - ▁team
584
+ - ▁chris
585
+ - ▁become
586
+ - ▁solution
587
+ - ▁protect
588
+ - ▁collect
589
+ - ▁different
590
+ - ▁those
591
+ - ▁connect
592
+ - ▁architect
593
+ - ▁language
594
+ - ▁simple
595
+ - ▁solve
596
+ - ▁before
597
+ - ▁community
598
+ - ▁country
599
+ - ▁secret
600
+ - ▁keep
601
+ - ▁food
602
+ - ▁thought
603
+ - ▁discover
604
+ - ▁environment
605
+ - ▁government
606
+ - ▁public
607
+ - ;
608
+ - '!'
609
+ - /
610
+ - q
611
+ - '%'
612
+ - '@'
613
+ - ']'
614
+ - +
615
+ - '&'
616
+ - '|'
617
+ - _
618
+ - (
619
+ - '"'
620
+ - $
621
+ - '*'
622
+ - '='
623
+ - '['
624
+ - '`'
625
+ - <sos/eos>
626
+ transcript_token_list: null
627
+ two_pass: false
628
+ pre_postencoder_norm: false
629
+ init: null
630
+ input_size: 1
631
+ ctc_conf:
632
+ dropout_rate: 0.0
633
+ ctc_type: builtin
634
+ reduce: true
635
+ ignore_nan_grad: null
636
+ zero_infinity: true
637
+ brctc_risk_strategy: exp
638
+ brctc_group_strategy: end
639
+ brctc_risk_factor: 0.0
640
+ joint_net_conf: null
641
+ use_preprocessor: true
642
+ token_type: bpe
643
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
644
+ non_linguistic_symbols: null
645
+ cleaner: null
646
+ g2p: null
647
+ speech_volume_normalize: null
648
+ rir_scp: null
649
+ rir_apply_prob: 1.0
650
+ noise_scp: null
651
+ noise_apply_prob: 1.0
652
+ noise_db_range: '13_15'
653
+ short_noise_thres: 0.5
654
+ frontend: null
655
+ frontend_conf: {}
656
+ specaug: null
657
+ specaug_conf: {}
658
+ normalize: null
659
+ normalize_conf: {}
660
+ model: espnet
661
+ model_conf:
662
+ ctc_weight: 0.0
663
+ lsm_weight: 0.1
664
+ length_normalized_loss: false
665
+ weighted_sum: true
666
+ extract_feats_in_collect_stats: false
667
+ preencoder: null
668
+ preencoder_conf: {}
669
+ encoder: whisper
670
+ encoder_conf:
671
+ whisper_model: medium
672
+ dropout_rate: 0.0
673
+ use_specaug: true
674
+ specaug_conf:
675
+ apply_time_warp: true
676
+ time_warp_window: 5
677
+ time_warp_mode: bicubic
678
+ apply_freq_mask: true
679
+ freq_mask_width_range:
680
+ - 0
681
+ - 40
682
+ num_freq_mask: 2
683
+ apply_time_mask: true
684
+ time_mask_width_ratio_range:
685
+ - 0.0
686
+ - 0.12
687
+ num_time_mask: 5
688
+ prepostencoder: linear
689
+ prepostencoder_conf:
690
+ input_size: 1024
691
+ output_size: 80
692
+ postencoder: conformer_full
693
+ postencoder_conf:
694
+ output_size: 256
695
+ attention_heads: 4
696
+ linear_units: 1024
697
+ num_blocks: 12
698
+ dropout_rate: 0.1
699
+ positional_dropout_rate: 0.1
700
+ attention_dropout_rate: 0.1
701
+ input_layer: conv2d2
702
+ normalize_before: true
703
+ macaron_style: true
704
+ rel_pos_type: latest
705
+ pos_enc_layer_type: rel_pos
706
+ selfattention_layer_type: rel_selfattn
707
+ activation_type: swish
708
+ use_cnn_module: true
709
+ cnn_module_kernel: 31
710
+ deliberationencoder: null
711
+ deliberationencoder_conf: {}
712
+ decoder: transformer
713
+ decoder_conf:
714
+ attention_heads: 4
715
+ linear_units: 2048
716
+ num_blocks: 6
717
+ dropout_rate: 0.1
718
+ positional_dropout_rate: 0.1
719
+ self_attention_dropout_rate: 0.1
720
+ src_attention_dropout_rate: 0.1
721
+ postdecoder: null
722
+ postdecoder_conf: {}
723
+ required:
724
+ - output_dir
725
+ - token_list
726
+ version: '202310'
727
+ distributed: true
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/acc.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/backward_time.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/cer.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/clip.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/forward_time.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/grad_norm.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/iter_time.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss_att.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/loss_scale.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/optim0_lr0.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/optim_step_time.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/train_time.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/images/wer.png ADDED
exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13655138975981e18b5f787b6283d89672273b66055945dbe43e52ab21090f01
3
+ size 1356868802
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ slu_model_file: exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/valid.acc.ave_10best.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) \n[GCC 11.2.0]"
5
+ timestamp: 1711761248.000196
6
+ torch: 2.1.0+cu121
7
+ yaml_files:
8
+ slu_train_config: exp/slu_train_asr_whisper_weighted_conv2d2_raw_en_bpe500_sp/config.yaml