ESPnet
English
audio
classification
shikhar7ssu commited on
Commit
ecea259
·
verified ·
1 Parent(s): b83b4bd

Upload 19 files

Browse files
Files changed (19) hide show
  1. README.md +799 -0
  2. as20k_fulltrain/data/token_list +528 -0
  3. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth +3 -0
  4. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/RESULTS.md +19 -0
  5. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml +707 -0
  6. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/acc.png +0 -0
  7. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/backward_time.png +0 -0
  8. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/clip.png +0 -0
  9. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/forward_time.png +0 -0
  10. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/gpu_max_cached_mem_GB.png +0 -0
  11. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/grad_norm.png +0 -0
  12. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/iter_time.png +0 -0
  13. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss.png +0 -0
  14. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss_scale.png +0 -0
  15. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/mAP.png +0 -0
  16. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim0_lr0.png +0 -0
  17. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim_step_time.png +0 -0
  18. as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/train_time.png +0 -0
  19. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - classification
6
+ language: en
7
+ datasets:
8
+ - as20k
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 CLS model
13
+
14
+ ### `shikhar7ssu/BEATs-AS20K`
15
+
16
+ This model was trained by Shikhar Bharadwaj using as20k recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 9634114cd3c35e230f4a9dda752e982512517653
26
+ pip install -e .
27
+ cd egs2/as20k/cls1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model shikhar7ssu/BEATs-AS20K
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Fri Jan 3 23:25:40 EST 2025`
35
+ - python version: `3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202412`
37
+ - pytorch version: `pytorch 2.4.0`
38
+ - Git hash: `635b3add116ae68c056f7aa67f64591c9ba7eb3e`
39
+ - Commit date: `Thu Jan 2 11:46:32 2025 -0500`
40
+
41
+ ## cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
42
+ |Dataset|Metric|Value|
43
+ |---|---|---|
44
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_acc|47.73
45
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mAP|37.46
46
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_auc|96.58
47
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_labels|527.00
48
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_instances|20123.00
49
+
50
+ ## CLS config
51
+
52
+ <details><summary>expand</summary>
53
+
54
+ ```
55
+ config: conf/beats_cls.yaml
56
+ print_config: false
57
+ log_level: INFO
58
+ drop_last_iter: false
59
+ dry_run: false
60
+ iterator_type: sequence
61
+ valid_iterator_type: null
62
+ output_dir: ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
63
+ ngpu: 1
64
+ seed: 0
65
+ num_workers: 2
66
+ num_att_plot: 0
67
+ dist_backend: nccl
68
+ dist_init_method: env://
69
+ dist_world_size: null
70
+ dist_rank: null
71
+ local_rank: 0
72
+ dist_master_addr: null
73
+ dist_master_port: null
74
+ dist_launcher: null
75
+ multiprocessing_distributed: false
76
+ unused_parameters: true
77
+ sharded_ddp: false
78
+ use_deepspeed: false
79
+ deepspeed_config: null
80
+ cudnn_enabled: true
81
+ cudnn_benchmark: false
82
+ cudnn_deterministic: true
83
+ use_tf32: false
84
+ collect_stats: false
85
+ write_collected_feats: false
86
+ max_epoch: 160
87
+ patience: null
88
+ val_scheduler_criterion:
89
+ - valid
90
+ - loss
91
+ early_stopping_criterion:
92
+ - valid
93
+ - loss
94
+ - min
95
+ best_model_criterion:
96
+ - - valid
97
+ - mAP
98
+ - max
99
+ keep_nbest_models: 1
100
+ nbest_averaging_interval: 0
101
+ grad_clip: 1
102
+ grad_clip_type: 2.0
103
+ grad_noise: false
104
+ accum_grad: 1
105
+ no_forward_run: false
106
+ resume: true
107
+ train_dtype: float32
108
+ use_amp: false
109
+ log_interval: null
110
+ use_matplotlib: true
111
+ use_tensorboard: true
112
+ create_graph_in_tensorboard: false
113
+ use_wandb: false
114
+ wandb_project: null
115
+ wandb_id: null
116
+ wandb_entity: null
117
+ wandb_name: null
118
+ wandb_model_log_interval: -1
119
+ detect_anomaly: false
120
+ use_adapter: false
121
+ adapter: lora
122
+ save_strategy: all
123
+ adapter_conf: {}
124
+ pretrain_path: null
125
+ init_param: []
126
+ ignore_init_mismatch: false
127
+ freeze_param: []
128
+ num_iters_per_epoch: null
129
+ batch_size: 80
130
+ valid_batch_size: 1200
131
+ batch_bins: 1000000
132
+ valid_batch_bins: null
133
+ category_sample_size: 10
134
+ train_shape_file:
135
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/speech_shape
136
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/label_shape
137
+ valid_shape_file:
138
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/speech_shape
139
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/label_shape
140
+ batch_type: folded
141
+ valid_batch_type: null
142
+ fold_length:
143
+ - 160000
144
+ - 600
145
+ sort_in_batch: descending
146
+ shuffle_within_batch: false
147
+ sort_batch: descending
148
+ multiple_iterator: false
149
+ chunk_length: 500
150
+ chunk_shift_ratio: 0.5
151
+ num_cache_chunks: 1024
152
+ chunk_excluded_key_prefixes: []
153
+ chunk_default_fs: null
154
+ chunk_max_abs_length: null
155
+ chunk_discard_short_samples: true
156
+ train_data_path_and_name_and_type:
157
+ - - ./beats_runs/as20k_fulltrain/dump/train/wav.scp
158
+ - speech
159
+ - sound
160
+ - - ./beats_runs/as20k_fulltrain/dump/train/text
161
+ - label
162
+ - text
163
+ valid_data_path_and_name_and_type:
164
+ - - ./beats_runs/as20k_fulltrain/dump/val/wav.scp
165
+ - speech
166
+ - sound
167
+ - - ./beats_runs/as20k_fulltrain/dump/val/text
168
+ - label
169
+ - text
170
+ multi_task_dataset: false
171
+ allow_variable_data_keys: false
172
+ max_cache_size: 0.0
173
+ max_cache_fd: 32
174
+ allow_multi_rates: false
175
+ valid_max_cache_size: null
176
+ exclude_weight_decay: false
177
+ exclude_weight_decay_conf: {}
178
+ optim: adamw
179
+ optim_conf:
180
+ lr: 3.0e-05
181
+ weight_decay: 0.01
182
+ betas:
183
+ - 0.9
184
+ - 0.98
185
+ scheduler: cosineannealingwarmuprestarts
186
+ scheduler_conf:
187
+ first_cycle_steps: 95000
188
+ warmup_steps: 8000
189
+ max_lr: 3.0e-05
190
+ min_lr: 5.0e-06
191
+ token_list:
192
+ - Music
193
+ - Speech
194
+ - Vehicle
195
+ - Inside,_small_room
196
+ - Animal
197
+ - Musical_instrument
198
+ - Singing
199
+ - Domestic_animals,_pets
200
+ - Guitar
201
+ - Plucked_string_instrument
202
+ - Water
203
+ - Car
204
+ - Dog
205
+ - Percussion
206
+ - Wind_instrument,_woodwind_instrument
207
+ - Outside,_urban_or_manmade
208
+ - Outside,_rural_or_natural
209
+ - Boat,_Water_vehicle
210
+ - Brass_instrument
211
+ - Fowl
212
+ - Drum
213
+ - Siren
214
+ - Engine
215
+ - Bird
216
+ - Insect
217
+ - Gunshot,_gunfire
218
+ - Wood
219
+ - Rail_transport
220
+ - Train
221
+ - Wind
222
+ - Inside,_large_room_or_hall
223
+ - Railroad_car,_train_wagon
224
+ - Child_speech,_kid_speaking
225
+ - Crowd
226
+ - Rub
227
+ - Keyboard_(musical)
228
+ - Wind_noise_(microphone)
229
+ - Pizzicato
230
+ - Emergency_vehicle
231
+ - Bird_vocalization,_bird_call,_bird_song
232
+ - Livestock,_farm_animals,_working_animals
233
+ - Cat
234
+ - Organ
235
+ - Fly,_housefly
236
+ - Mechanisms
237
+ - Bowed_string_instrument
238
+ - Rain
239
+ - Laughter
240
+ - Aircraft
241
+ - Electronic_music
242
+ - Effects_unit
243
+ - Hum
244
+ - Tools
245
+ - Drum_kit
246
+ - Snare_drum
247
+ - Hiss
248
+ - Piano
249
+ - Water_tap,_faucet
250
+ - Rimshot
251
+ - Bass_drum
252
+ - Chicken,_rooster
253
+ - Marimba,_xylophone
254
+ - Horse
255
+ - Song
256
+ - Quack
257
+ - Power_tool
258
+ - Heart_sounds,_heartbeat
259
+ - Goose
260
+ - Hammond_organ
261
+ - Rock_music
262
+ - Ocean
263
+ - Mains_hum
264
+ - Thunder
265
+ - Chime
266
+ - Electronic_dance_music
267
+ - Typing
268
+ - Sink_(filling_or_washing)
269
+ - Raindrop
270
+ - Cello
271
+ - Electric_guitar
272
+ - Cheering
273
+ - Church_bell
274
+ - Christian_music
275
+ - Drum_roll
276
+ - Trombone
277
+ - Glockenspiel
278
+ - Trumpet
279
+ - Cymbal
280
+ - Tabla
281
+ - Clickety-clack
282
+ - Cricket
283
+ - Steam_whistle
284
+ - Explosion
285
+ - Saxophone
286
+ - Thunderstorm
287
+ - Pop_music
288
+ - Zither
289
+ - Applause
290
+ - Choir
291
+ - Whack,_thwack
292
+ - Clarinet
293
+ - Camera
294
+ - Electric_piano
295
+ - Independent_music
296
+ - Fire
297
+ - Frog
298
+ - Jet_engine
299
+ - Music_of_Asia
300
+ - Ding
301
+ - Waves,_surf
302
+ - Cattle,_bovinae
303
+ - Turkey
304
+ - Television
305
+ - Coo
306
+ - Scratching_(performance_technique)
307
+ - Flute
308
+ - Liquid
309
+ - Harp
310
+ - Progressive_rock
311
+ - Happy_music
312
+ - Steel_guitar,_slide_guitar
313
+ - Whoosh,_swoosh,_swish
314
+ - Boom
315
+ - Breathing
316
+ - Electronic_organ
317
+ - Environmental_noise
318
+ - Distortion
319
+ - Alarm_clock
320
+ - Fixed-wing_aircraft,_airplane
321
+ - Violin,_fiddle
322
+ - Whistling
323
+ - Accordion
324
+ - Disco
325
+ - Pump_(liquid)
326
+ - Waterfall
327
+ - Beep,_bleep
328
+ - Blues
329
+ - Grunge
330
+ - Hip_hop_music
331
+ - Whistle
332
+ - Fusillade
333
+ - Splash,_splatter
334
+ - Gush
335
+ - Toothbrush
336
+ - Knock
337
+ - Gargling
338
+ - Snoring
339
+ - Hammer
340
+ - Gobble
341
+ - Walk,_footsteps
342
+ - Jackhammer
343
+ - Filing_(rasp)
344
+ - Snort
345
+ - Narration,_monologue
346
+ - Tire_squeal
347
+ - Fire_alarm
348
+ - Squeal
349
+ - Meow
350
+ - Caterwaul
351
+ - Cutlery,_silverware
352
+ - Mantra
353
+ - Opera
354
+ - Classical_music
355
+ - Theremin
356
+ - Burst,_pop
357
+ - Drip
358
+ - Tick
359
+ - Children_shouting
360
+ - Creak
361
+ - Hiccup
362
+ - Pigeon,_dove
363
+ - Bicycle_bell
364
+ - Baby_cry,_infant_cry
365
+ - Duck
366
+ - Fireworks
367
+ - Tambourine
368
+ - Rodents,_rats,_mice
369
+ - Buzzer
370
+ - Splinter
371
+ - Writing
372
+ - Goat
373
+ - Sheep
374
+ - Heavy_metal
375
+ - Ska
376
+ - Neigh,_whinny
377
+ - Sizzle
378
+ - Rowboat,_canoe,_kayak
379
+ - Wood_block
380
+ - Clang
381
+ - Door
382
+ - Female_singing
383
+ - Stream
384
+ - Chant
385
+ - Vocal_music
386
+ - Yodeling
387
+ - Bee,_wasp,_etc.
388
+ - Air_brake
389
+ - Whir
390
+ - Bird_flight,_flapping_wings
391
+ - French_horn
392
+ - Telephone_dialing,_DTMF
393
+ - Squeak
394
+ - Sitar
395
+ - Smoke_detector,_smoke_alarm
396
+ - Tick-tock
397
+ - Gurgling
398
+ - Bellow
399
+ - Harmonic
400
+ - Male_singing
401
+ - Giggle
402
+ - Bark
403
+ - Vibration
404
+ - Drill
405
+ - Skidding
406
+ - Scratch
407
+ - Drawer_open_or_close
408
+ - Chop
409
+ - Drum_machine
410
+ - Squish
411
+ - Toilet_flush
412
+ - Fart
413
+ - Basketball_bounce
414
+ - Electronic_tuner
415
+ - Singing_bowl
416
+ - Squawk
417
+ - Conversation
418
+ - Reggae
419
+ - Funny_music
420
+ - Scrape
421
+ - Sewing_machine
422
+ - Tender_music
423
+ - Swing_music
424
+ - Dishes,_pots,_and_pans
425
+ - Sampler
426
+ - Synthesizer
427
+ - Clapping
428
+ - Hubbub,_speech_noise,_speech_babble
429
+ - Engine_knocking
430
+ - Canidae,_dogs,_wolves
431
+ - Chainsaw
432
+ - Pour
433
+ - Croak
434
+ - Chewing,_mastication
435
+ - Cowbell
436
+ - Propeller,_airscrew
437
+ - Didgeridoo
438
+ - Ringtone
439
+ - Rattle_(instrument)
440
+ - Artillery_fire
441
+ - Cash_register
442
+ - Crack
443
+ - Growling
444
+ - Mosquito
445
+ - Carnatic_music
446
+ - Honk
447
+ - Howl
448
+ - Cacophony
449
+ - Gospel_music
450
+ - Firecracker
451
+ - Strum
452
+ - Motorboat,_speedboat
453
+ - Clock
454
+ - Dance_music
455
+ - Microwave_oven
456
+ - Country
457
+ - Bluegrass
458
+ - Rattle
459
+ - Mallet_percussion
460
+ - Computer_keyboard
461
+ - Bass_guitar
462
+ - Electric_shaver,_electric_razor
463
+ - Sawing
464
+ - Owl
465
+ - Whip
466
+ - White_noise
467
+ - Chirp_tone
468
+ - Boiling
469
+ - Ship
470
+ - Mouse
471
+ - Breaking
472
+ - Silence
473
+ - Throat_clearing
474
+ - Bleat
475
+ - Salsa_music
476
+ - Patter
477
+ - Vibraphone
478
+ - Flap
479
+ - Typewriter
480
+ - Change_ringing_(campanology)
481
+ - Trickle,_dribble
482
+ - Video_game_music
483
+ - Glass
484
+ - Dial_tone
485
+ - Radio
486
+ - Bell
487
+ - Moo
488
+ - Heart_murmur
489
+ - Clatter
490
+ - Sniff
491
+ - Double_bass
492
+ - Background_music
493
+ - Lawn_mower
494
+ - Printer
495
+ - House_music
496
+ - Tearing
497
+ - Angry_music
498
+ - Male_speech,_man_speaking
499
+ - Wild_animals
500
+ - Cupboard_open_or_close
501
+ - Harpsichord
502
+ - Light_engine_(high_frequency)
503
+ - Child_singing
504
+ - Zipper_(clothing)
505
+ - Jazz
506
+ - Belly_laugh
507
+ - Roar
508
+ - Motor_vehicle_(road)
509
+ - Crowing,_cock-a-doodle-doo
510
+ - Cluck
511
+ - Sad_music
512
+ - Hi-hat
513
+ - Cough
514
+ - Stomach_rumble
515
+ - Alarm
516
+ - String_section
517
+ - Sonar
518
+ - Keys_jangling
519
+ - Synthetic_singing
520
+ - Rapping
521
+ - Sidetone
522
+ - Orchestra
523
+ - Throbbing
524
+ - Whale_vocalization
525
+ - Thunk
526
+ - Children_playing
527
+ - Snake
528
+ - Chink,_clink
529
+ - Chirp,_tweet
530
+ - Boing
531
+ - Shuffle
532
+ - Pulse
533
+ - Punk_rock
534
+ - Crow
535
+ - Caw
536
+ - Static
537
+ - Clicking
538
+ - Snicker
539
+ - Whispering
540
+ - Pink_noise
541
+ - Crushing
542
+ - Wedding_music
543
+ - Crumpling,_crinkling
544
+ - Crackle
545
+ - Whoop
546
+ - Electric_toothbrush
547
+ - Train_wheels_squealing
548
+ - Yell
549
+ - Wind_chime
550
+ - Frying_(food)
551
+ - Christmas_music
552
+ - Fill_(with_liquid)
553
+ - Reverberation
554
+ - Beatboxing
555
+ - Harmonica
556
+ - Banjo
557
+ - Sliding_door
558
+ - Groan
559
+ - Bagpipes
560
+ - Spray
561
+ - Stir
562
+ - Acoustic_guitar
563
+ - Tap
564
+ - Chorus_effect
565
+ - Noise
566
+ - Crunch
567
+ - Biting
568
+ - Aircraft_engine
569
+ - Busy_signal
570
+ - Bang
571
+ - Techno
572
+ - Tuning_fork
573
+ - Tapping_(guitar_technique)
574
+ - Pig
575
+ - Maraca
576
+ - Vacuum_cleaner
577
+ - Mandolin
578
+ - Electronica
579
+ - Theme_music
580
+ - Yip
581
+ - A_capella
582
+ - Rustle
583
+ - Chatter
584
+ - Traditional_music
585
+ - Soul_music
586
+ - Rustling_leaves
587
+ - Afrobeat
588
+ - Hoot
589
+ - Slosh
590
+ - Roaring_cats_(lions,_tigers)
591
+ - Chopping_(food)
592
+ - Heavy_engine_(low_frequency)
593
+ - Sine_wave
594
+ - Speech_synthesizer
595
+ - Middle_Eastern_music
596
+ - Music_of_Latin_America
597
+ - Arrow
598
+ - Timpani
599
+ - Eruption
600
+ - Shofar
601
+ - Jingle_bell
602
+ - Humming
603
+ - Sanding
604
+ - Female_speech,_woman_speaking
605
+ - Gong
606
+ - Rain_on_surface
607
+ - Pant
608
+ - Dubstep
609
+ - Clip-clop
610
+ - Finger_snapping
611
+ - Blender
612
+ - Drum_and_bass
613
+ - Bouncing
614
+ - Vehicle_horn,_car_horn,_honking
615
+ - Slam
616
+ - Idling
617
+ - Rhythm_and_blues
618
+ - Race_car,_auto_racing
619
+ - Single-lens_reflex_camera
620
+ - Smash,_crash
621
+ - Purr
622
+ - Shatter
623
+ - Steelpan
624
+ - Whimper_(dog)
625
+ - Power_windows,_electric_windows
626
+ - Battle_cry
627
+ - Scary_music
628
+ - Hands
629
+ - Echo
630
+ - Truck
631
+ - Buzz
632
+ - Mechanical_fan
633
+ - Plop
634
+ - Run
635
+ - Gasp
636
+ - Psychedelic_rock
637
+ - Grunt
638
+ - Helicopter
639
+ - Dental_drill,_dentist's_drill
640
+ - Babbling
641
+ - Zing
642
+ - Oink
643
+ - Soundtrack_music
644
+ - Ambulance_(siren)
645
+ - Exciting_music
646
+ - Telephone
647
+ - Jingle_(music)
648
+ - Tubular_bells
649
+ - Burping,_eructation
650
+ - Baby_laughter
651
+ - Ping
652
+ - Bow-wow
653
+ - Foghorn
654
+ - Machine_gun
655
+ - Ukulele
656
+ - Telephone_bell_ringing
657
+ - Pulleys
658
+ - Gears
659
+ - Sigh
660
+ - Coin_(dropping)
661
+ - Music_of_Africa
662
+ - Scissors
663
+ - Inside,_public_space
664
+ - Trance_music
665
+ - Roll
666
+ - Thump,_thud
667
+ - Air_conditioning
668
+ - Ding-dong
669
+ - Ratchet,_pawl
670
+ - Hair_dryer
671
+ - Shout
672
+ - Ambient_music
673
+ - Music_for_children
674
+ - Toot
675
+ - Bathtub_(filling_or_washing)
676
+ - Slap,_smack
677
+ - Chuckle,_chortle
678
+ - Traffic_noise,_roadway_noise
679
+ - Bicycle
680
+ - Whimper
681
+ - Doorbell
682
+ - Wheeze
683
+ - Sailboat,_sailing_ship
684
+ - Cap_gun
685
+ - Wail,_moan
686
+ - Rock_and_roll
687
+ - Jingle,_tinkle
688
+ - Fire_engine,_fire_truck_(siren)
689
+ - Funk
690
+ - Lullaby
691
+ - Field_recording
692
+ - Skateboard
693
+ - Steam
694
+ - Rumble
695
+ - Medium_engine_(mid_frequency)
696
+ - Sound_effect
697
+ - Flamenco
698
+ - Shuffling_cards
699
+ - Subway,_metro,_underground
700
+ - Police_car_(siren)
701
+ - Folk_music
702
+ - Crying,_sobbing
703
+ - New-age_music
704
+ - Ice_cream_truck,_ice_cream_van
705
+ - Music_of_Bollywood
706
+ - Accelerating,_revving,_vroom
707
+ - Screaming
708
+ - Motorcycle
709
+ - Engine_starting
710
+ - Train_whistle
711
+ - Car_passing_by
712
+ - Bus
713
+ - Sneeze
714
+ - Train_horn
715
+ - Air_horn,_truck_horn
716
+ - Civil_defense_siren
717
+ - Car_alarm
718
+ - Reversing_beeps
719
+ - <unk>
720
+ token_type: word
721
+ init: xavier_normal
722
+ input_size: 1
723
+ use_preprocessor: true
724
+ frontend: null
725
+ frontend_conf: {}
726
+ specaug: null
727
+ specaug_conf: {}
728
+ normalize: null
729
+ normalize_conf: {}
730
+ preencoder: null
731
+ preencoder_conf: {}
732
+ encoder: beats
733
+ encoder_conf:
734
+ beats_ckpt_path: /compute/babel-13-33/sbharad2/models/BEATs/BEATs_iter3_plus_AS20K.pt
735
+ beats_config:
736
+ layer_wise_gradient_decay_ratio: 0.3
737
+ encoder_layerdrop: 0.1
738
+ dropout: 0.0
739
+ use_weighted_representation: false
740
+ specaug_config:
741
+ apply_time_warp: true
742
+ apply_freq_mask: false
743
+ apply_time_mask: true
744
+ time_mask_width_ratio_range:
745
+ - 0
746
+ - 0.06
747
+ num_time_mask: 1
748
+ roll_augment: true
749
+ roll_interval: 1
750
+ decoder: linear
751
+ decoder_conf: {}
752
+ model: espnet
753
+ model_conf:
754
+ classification_type: multi-label
755
+ mixup_augmentation: true
756
+ lsm_weight: 0.0
757
+ required:
758
+ - output_dir
759
+ - token_list
760
+ version: '202412'
761
+ distributed: false
762
+ ```
763
+
764
+ </details>
765
+
766
+
767
+
768
+ ### Citing ESPnet
769
+
770
+ ```BibTex
771
+ @inproceedings{watanabe2018espnet,
772
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
773
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
774
+ year={2018},
775
+ booktitle={Proceedings of Interspeech},
776
+ pages={2207--2211},
777
+ doi={10.21437/Interspeech.2018-1456},
778
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
779
+ }
780
+
781
+
782
+
783
+
784
+
785
+
786
+ ```
787
+
788
+ or arXiv:
789
+
790
+ ```bibtex
791
+ @misc{watanabe2018espnet,
792
+ title={ESPnet: End-to-End Speech Processing Toolkit},
793
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
794
+ year={2018},
795
+ eprint={1804.00015},
796
+ archivePrefix={arXiv},
797
+ primaryClass={cs.CL}
798
+ }
799
+ ```
as20k_fulltrain/data/token_list ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Music
2
+ Speech
3
+ Vehicle
4
+ Inside,_small_room
5
+ Animal
6
+ Musical_instrument
7
+ Singing
8
+ Domestic_animals,_pets
9
+ Guitar
10
+ Plucked_string_instrument
11
+ Water
12
+ Car
13
+ Dog
14
+ Percussion
15
+ Wind_instrument,_woodwind_instrument
16
+ Outside,_urban_or_manmade
17
+ Outside,_rural_or_natural
18
+ Boat,_Water_vehicle
19
+ Brass_instrument
20
+ Fowl
21
+ Drum
22
+ Siren
23
+ Engine
24
+ Bird
25
+ Insect
26
+ Gunshot,_gunfire
27
+ Wood
28
+ Rail_transport
29
+ Train
30
+ Wind
31
+ Inside,_large_room_or_hall
32
+ Railroad_car,_train_wagon
33
+ Child_speech,_kid_speaking
34
+ Crowd
35
+ Rub
36
+ Keyboard_(musical)
37
+ Wind_noise_(microphone)
38
+ Pizzicato
39
+ Emergency_vehicle
40
+ Bird_vocalization,_bird_call,_bird_song
41
+ Livestock,_farm_animals,_working_animals
42
+ Cat
43
+ Organ
44
+ Fly,_housefly
45
+ Mechanisms
46
+ Bowed_string_instrument
47
+ Rain
48
+ Laughter
49
+ Aircraft
50
+ Electronic_music
51
+ Effects_unit
52
+ Hum
53
+ Tools
54
+ Drum_kit
55
+ Snare_drum
56
+ Hiss
57
+ Piano
58
+ Water_tap,_faucet
59
+ Rimshot
60
+ Bass_drum
61
+ Chicken,_rooster
62
+ Marimba,_xylophone
63
+ Horse
64
+ Song
65
+ Quack
66
+ Power_tool
67
+ Heart_sounds,_heartbeat
68
+ Goose
69
+ Hammond_organ
70
+ Rock_music
71
+ Ocean
72
+ Mains_hum
73
+ Thunder
74
+ Chime
75
+ Electronic_dance_music
76
+ Typing
77
+ Sink_(filling_or_washing)
78
+ Raindrop
79
+ Cello
80
+ Electric_guitar
81
+ Cheering
82
+ Church_bell
83
+ Christian_music
84
+ Drum_roll
85
+ Trombone
86
+ Glockenspiel
87
+ Trumpet
88
+ Cymbal
89
+ Tabla
90
+ Clickety-clack
91
+ Cricket
92
+ Steam_whistle
93
+ Explosion
94
+ Saxophone
95
+ Thunderstorm
96
+ Pop_music
97
+ Zither
98
+ Applause
99
+ Choir
100
+ Whack,_thwack
101
+ Clarinet
102
+ Camera
103
+ Electric_piano
104
+ Independent_music
105
+ Fire
106
+ Frog
107
+ Jet_engine
108
+ Music_of_Asia
109
+ Ding
110
+ Waves,_surf
111
+ Cattle,_bovinae
112
+ Turkey
113
+ Television
114
+ Coo
115
+ Scratching_(performance_technique)
116
+ Flute
117
+ Liquid
118
+ Harp
119
+ Progressive_rock
120
+ Happy_music
121
+ Steel_guitar,_slide_guitar
122
+ Whoosh,_swoosh,_swish
123
+ Boom
124
+ Breathing
125
+ Electronic_organ
126
+ Environmental_noise
127
+ Distortion
128
+ Alarm_clock
129
+ Fixed-wing_aircraft,_airplane
130
+ Violin,_fiddle
131
+ Whistling
132
+ Accordion
133
+ Disco
134
+ Pump_(liquid)
135
+ Waterfall
136
+ Beep,_bleep
137
+ Blues
138
+ Grunge
139
+ Hip_hop_music
140
+ Whistle
141
+ Fusillade
142
+ Splash,_splatter
143
+ Gush
144
+ Toothbrush
145
+ Knock
146
+ Gargling
147
+ Snoring
148
+ Hammer
149
+ Gobble
150
+ Walk,_footsteps
151
+ Jackhammer
152
+ Filing_(rasp)
153
+ Snort
154
+ Narration,_monologue
155
+ Tire_squeal
156
+ Fire_alarm
157
+ Squeal
158
+ Meow
159
+ Caterwaul
160
+ Cutlery,_silverware
161
+ Mantra
162
+ Opera
163
+ Classical_music
164
+ Theremin
165
+ Burst,_pop
166
+ Drip
167
+ Tick
168
+ Children_shouting
169
+ Creak
170
+ Hiccup
171
+ Pigeon,_dove
172
+ Bicycle_bell
173
+ Baby_cry,_infant_cry
174
+ Duck
175
+ Fireworks
176
+ Tambourine
177
+ Rodents,_rats,_mice
178
+ Buzzer
179
+ Splinter
180
+ Writing
181
+ Goat
182
+ Sheep
183
+ Heavy_metal
184
+ Ska
185
+ Neigh,_whinny
186
+ Sizzle
187
+ Rowboat,_canoe,_kayak
188
+ Wood_block
189
+ Clang
190
+ Door
191
+ Female_singing
192
+ Stream
193
+ Chant
194
+ Vocal_music
195
+ Yodeling
196
+ Bee,_wasp,_etc.
197
+ Air_brake
198
+ Whir
199
+ Bird_flight,_flapping_wings
200
+ French_horn
201
+ Telephone_dialing,_DTMF
202
+ Squeak
203
+ Sitar
204
+ Smoke_detector,_smoke_alarm
205
+ Tick-tock
206
+ Gurgling
207
+ Bellow
208
+ Harmonic
209
+ Male_singing
210
+ Giggle
211
+ Bark
212
+ Vibration
213
+ Drill
214
+ Skidding
215
+ Scratch
216
+ Drawer_open_or_close
217
+ Chop
218
+ Drum_machine
219
+ Squish
220
+ Toilet_flush
221
+ Fart
222
+ Basketball_bounce
223
+ Electronic_tuner
224
+ Singing_bowl
225
+ Squawk
226
+ Conversation
227
+ Reggae
228
+ Funny_music
229
+ Scrape
230
+ Sewing_machine
231
+ Tender_music
232
+ Swing_music
233
+ Dishes,_pots,_and_pans
234
+ Sampler
235
+ Synthesizer
236
+ Clapping
237
+ Hubbub,_speech_noise,_speech_babble
238
+ Engine_knocking
239
+ Canidae,_dogs,_wolves
240
+ Chainsaw
241
+ Pour
242
+ Croak
243
+ Chewing,_mastication
244
+ Cowbell
245
+ Propeller,_airscrew
246
+ Didgeridoo
247
+ Ringtone
248
+ Rattle_(instrument)
249
+ Artillery_fire
250
+ Cash_register
251
+ Crack
252
+ Growling
253
+ Mosquito
254
+ Carnatic_music
255
+ Honk
256
+ Howl
257
+ Cacophony
258
+ Gospel_music
259
+ Firecracker
260
+ Strum
261
+ Motorboat,_speedboat
262
+ Clock
263
+ Dance_music
264
+ Microwave_oven
265
+ Country
266
+ Bluegrass
267
+ Rattle
268
+ Mallet_percussion
269
+ Computer_keyboard
270
+ Bass_guitar
271
+ Electric_shaver,_electric_razor
272
+ Sawing
273
+ Owl
274
+ Whip
275
+ White_noise
276
+ Chirp_tone
277
+ Boiling
278
+ Ship
279
+ Mouse
280
+ Breaking
281
+ Silence
282
+ Throat_clearing
283
+ Bleat
284
+ Salsa_music
285
+ Patter
286
+ Vibraphone
287
+ Flap
288
+ Typewriter
289
+ Change_ringing_(campanology)
290
+ Trickle,_dribble
291
+ Video_game_music
292
+ Glass
293
+ Dial_tone
294
+ Radio
295
+ Bell
296
+ Moo
297
+ Heart_murmur
298
+ Clatter
299
+ Sniff
300
+ Double_bass
301
+ Background_music
302
+ Lawn_mower
303
+ Printer
304
+ House_music
305
+ Tearing
306
+ Angry_music
307
+ Male_speech,_man_speaking
308
+ Wild_animals
309
+ Cupboard_open_or_close
310
+ Harpsichord
311
+ Light_engine_(high_frequency)
312
+ Child_singing
313
+ Zipper_(clothing)
314
+ Jazz
315
+ Belly_laugh
316
+ Roar
317
+ Motor_vehicle_(road)
318
+ Crowing,_cock-a-doodle-doo
319
+ Cluck
320
+ Sad_music
321
+ Hi-hat
322
+ Cough
323
+ Stomach_rumble
324
+ Alarm
325
+ String_section
326
+ Sonar
327
+ Keys_jangling
328
+ Synthetic_singing
329
+ Rapping
330
+ Sidetone
331
+ Orchestra
332
+ Throbbing
333
+ Whale_vocalization
334
+ Thunk
335
+ Children_playing
336
+ Snake
337
+ Chink,_clink
338
+ Chirp,_tweet
339
+ Boing
340
+ Shuffle
341
+ Pulse
342
+ Punk_rock
343
+ Crow
344
+ Caw
345
+ Static
346
+ Clicking
347
+ Snicker
348
+ Whispering
349
+ Pink_noise
350
+ Crushing
351
+ Wedding_music
352
+ Crumpling,_crinkling
353
+ Crackle
354
+ Whoop
355
+ Electric_toothbrush
356
+ Train_wheels_squealing
357
+ Yell
358
+ Wind_chime
359
+ Frying_(food)
360
+ Christmas_music
361
+ Fill_(with_liquid)
362
+ Reverberation
363
+ Beatboxing
364
+ Harmonica
365
+ Banjo
366
+ Sliding_door
367
+ Groan
368
+ Bagpipes
369
+ Spray
370
+ Stir
371
+ Acoustic_guitar
372
+ Tap
373
+ Chorus_effect
374
+ Noise
375
+ Crunch
376
+ Biting
377
+ Aircraft_engine
378
+ Busy_signal
379
+ Bang
380
+ Techno
381
+ Tuning_fork
382
+ Tapping_(guitar_technique)
383
+ Pig
384
+ Maraca
385
+ Vacuum_cleaner
386
+ Mandolin
387
+ Electronica
388
+ Theme_music
389
+ Yip
390
+ A_capella
391
+ Rustle
392
+ Chatter
393
+ Traditional_music
394
+ Soul_music
395
+ Rustling_leaves
396
+ Afrobeat
397
+ Hoot
398
+ Slosh
399
+ Roaring_cats_(lions,_tigers)
400
+ Chopping_(food)
401
+ Heavy_engine_(low_frequency)
402
+ Sine_wave
403
+ Speech_synthesizer
404
+ Middle_Eastern_music
405
+ Music_of_Latin_America
406
+ Arrow
407
+ Timpani
408
+ Eruption
409
+ Shofar
410
+ Jingle_bell
411
+ Humming
412
+ Sanding
413
+ Female_speech,_woman_speaking
414
+ Gong
415
+ Rain_on_surface
416
+ Pant
417
+ Dubstep
418
+ Clip-clop
419
+ Finger_snapping
420
+ Blender
421
+ Drum_and_bass
422
+ Bouncing
423
+ Vehicle_horn,_car_horn,_honking
424
+ Slam
425
+ Idling
426
+ Rhythm_and_blues
427
+ Race_car,_auto_racing
428
+ Single-lens_reflex_camera
429
+ Smash,_crash
430
+ Purr
431
+ Shatter
432
+ Steelpan
433
+ Whimper_(dog)
434
+ Power_windows,_electric_windows
435
+ Battle_cry
436
+ Scary_music
437
+ Hands
438
+ Echo
439
+ Truck
440
+ Buzz
441
+ Mechanical_fan
442
+ Plop
443
+ Run
444
+ Gasp
445
+ Psychedelic_rock
446
+ Grunt
447
+ Helicopter
448
+ Dental_drill,_dentist's_drill
449
+ Babbling
450
+ Zing
451
+ Oink
452
+ Soundtrack_music
453
+ Ambulance_(siren)
454
+ Exciting_music
455
+ Telephone
456
+ Jingle_(music)
457
+ Tubular_bells
458
+ Burping,_eructation
459
+ Baby_laughter
460
+ Ping
461
+ Bow-wow
462
+ Foghorn
463
+ Machine_gun
464
+ Ukulele
465
+ Telephone_bell_ringing
466
+ Pulleys
467
+ Gears
468
+ Sigh
469
+ Coin_(dropping)
470
+ Music_of_Africa
471
+ Scissors
472
+ Inside,_public_space
473
+ Trance_music
474
+ Roll
475
+ Thump,_thud
476
+ Air_conditioning
477
+ Ding-dong
478
+ Ratchet,_pawl
479
+ Hair_dryer
480
+ Shout
481
+ Ambient_music
482
+ Music_for_children
483
+ Toot
484
+ Bathtub_(filling_or_washing)
485
+ Slap,_smack
486
+ Chuckle,_chortle
487
+ Traffic_noise,_roadway_noise
488
+ Bicycle
489
+ Whimper
490
+ Doorbell
491
+ Wheeze
492
+ Sailboat,_sailing_ship
493
+ Cap_gun
494
+ Wail,_moan
495
+ Rock_and_roll
496
+ Jingle,_tinkle
497
+ Fire_engine,_fire_truck_(siren)
498
+ Funk
499
+ Lullaby
500
+ Field_recording
501
+ Skateboard
502
+ Steam
503
+ Rumble
504
+ Medium_engine_(mid_frequency)
505
+ Sound_effect
506
+ Flamenco
507
+ Shuffling_cards
508
+ Subway,_metro,_underground
509
+ Police_car_(siren)
510
+ Folk_music
511
+ Crying,_sobbing
512
+ New-age_music
513
+ Ice_cream_truck,_ice_cream_van
514
+ Music_of_Bollywood
515
+ Accelerating,_revving,_vroom
516
+ Screaming
517
+ Motorcycle
518
+ Engine_starting
519
+ Train_whistle
520
+ Car_passing_by
521
+ Bus
522
+ Sneeze
523
+ Train_horn
524
+ Air_horn,_truck_horn
525
+ Civil_defense_siren
526
+ Car_alarm
527
+ Reversing_beeps
528
+ <unk>
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c997fd80ffa768463445ff5c32b5835580d294cf0b538b8aef0a9f866622964f
3
+ size 362963013
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/RESULTS.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_cls_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Fri Jan 3 23:25:40 EST 2025`
5
+ - python version: `3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202412`
7
+ - pytorch version: `pytorch 2.4.0`
8
+ - Git hash: `635b3add116ae68c056f7aa67f64591c9ba7eb3e`
9
+ - Commit date: `Thu Jan 2 11:46:32 2025 -0500`
10
+
11
+ ## cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
12
+ |Dataset|Metric|Value|
13
+ |---|---|---|
14
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_acc|47.73
15
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mAP|37.46
16
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_auc|96.58
17
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_labels|527.00
18
+ ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_instances|20123.00
19
+
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml ADDED
@@ -0,0 +1,707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/beats_cls.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 2
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: true
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 160
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - valid
43
+ - mAP
44
+ - max
45
+ keep_nbest_models: 1
46
+ nbest_averaging_interval: 0
47
+ grad_clip: 1
48
+ grad_clip_type: 2.0
49
+ grad_noise: false
50
+ accum_grad: 1
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ use_amp: false
55
+ log_interval: null
56
+ use_matplotlib: true
57
+ use_tensorboard: true
58
+ create_graph_in_tensorboard: false
59
+ use_wandb: false
60
+ wandb_project: null
61
+ wandb_id: null
62
+ wandb_entity: null
63
+ wandb_name: null
64
+ wandb_model_log_interval: -1
65
+ detect_anomaly: false
66
+ use_adapter: false
67
+ adapter: lora
68
+ save_strategy: all
69
+ adapter_conf: {}
70
+ pretrain_path: null
71
+ init_param: []
72
+ ignore_init_mismatch: false
73
+ freeze_param: []
74
+ num_iters_per_epoch: null
75
+ batch_size: 80
76
+ valid_batch_size: 1200
77
+ batch_bins: 1000000
78
+ valid_batch_bins: null
79
+ category_sample_size: 10
80
+ train_shape_file:
81
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/speech_shape
82
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/label_shape
83
+ valid_shape_file:
84
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/speech_shape
85
+ - ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/label_shape
86
+ batch_type: folded
87
+ valid_batch_type: null
88
+ fold_length:
89
+ - 160000
90
+ - 600
91
+ sort_in_batch: descending
92
+ shuffle_within_batch: false
93
+ sort_batch: descending
94
+ multiple_iterator: false
95
+ chunk_length: 500
96
+ chunk_shift_ratio: 0.5
97
+ num_cache_chunks: 1024
98
+ chunk_excluded_key_prefixes: []
99
+ chunk_default_fs: null
100
+ chunk_max_abs_length: null
101
+ chunk_discard_short_samples: true
102
+ train_data_path_and_name_and_type:
103
+ - - ./beats_runs/as20k_fulltrain/dump/train/wav.scp
104
+ - speech
105
+ - sound
106
+ - - ./beats_runs/as20k_fulltrain/dump/train/text
107
+ - label
108
+ - text
109
+ valid_data_path_and_name_and_type:
110
+ - - ./beats_runs/as20k_fulltrain/dump/val/wav.scp
111
+ - speech
112
+ - sound
113
+ - - ./beats_runs/as20k_fulltrain/dump/val/text
114
+ - label
115
+ - text
116
+ multi_task_dataset: false
117
+ allow_variable_data_keys: false
118
+ max_cache_size: 0.0
119
+ max_cache_fd: 32
120
+ allow_multi_rates: false
121
+ valid_max_cache_size: null
122
+ exclude_weight_decay: false
123
+ exclude_weight_decay_conf: {}
124
+ optim: adamw
125
+ optim_conf:
126
+ lr: 3.0e-05
127
+ weight_decay: 0.01
128
+ betas:
129
+ - 0.9
130
+ - 0.98
131
+ scheduler: cosineannealingwarmuprestarts
132
+ scheduler_conf:
133
+ first_cycle_steps: 95000
134
+ warmup_steps: 8000
135
+ max_lr: 3.0e-05
136
+ min_lr: 5.0e-06
137
+ token_list:
138
+ - Music
139
+ - Speech
140
+ - Vehicle
141
+ - Inside,_small_room
142
+ - Animal
143
+ - Musical_instrument
144
+ - Singing
145
+ - Domestic_animals,_pets
146
+ - Guitar
147
+ - Plucked_string_instrument
148
+ - Water
149
+ - Car
150
+ - Dog
151
+ - Percussion
152
+ - Wind_instrument,_woodwind_instrument
153
+ - Outside,_urban_or_manmade
154
+ - Outside,_rural_or_natural
155
+ - Boat,_Water_vehicle
156
+ - Brass_instrument
157
+ - Fowl
158
+ - Drum
159
+ - Siren
160
+ - Engine
161
+ - Bird
162
+ - Insect
163
+ - Gunshot,_gunfire
164
+ - Wood
165
+ - Rail_transport
166
+ - Train
167
+ - Wind
168
+ - Inside,_large_room_or_hall
169
+ - Railroad_car,_train_wagon
170
+ - Child_speech,_kid_speaking
171
+ - Crowd
172
+ - Rub
173
+ - Keyboard_(musical)
174
+ - Wind_noise_(microphone)
175
+ - Pizzicato
176
+ - Emergency_vehicle
177
+ - Bird_vocalization,_bird_call,_bird_song
178
+ - Livestock,_farm_animals,_working_animals
179
+ - Cat
180
+ - Organ
181
+ - Fly,_housefly
182
+ - Mechanisms
183
+ - Bowed_string_instrument
184
+ - Rain
185
+ - Laughter
186
+ - Aircraft
187
+ - Electronic_music
188
+ - Effects_unit
189
+ - Hum
190
+ - Tools
191
+ - Drum_kit
192
+ - Snare_drum
193
+ - Hiss
194
+ - Piano
195
+ - Water_tap,_faucet
196
+ - Rimshot
197
+ - Bass_drum
198
+ - Chicken,_rooster
199
+ - Marimba,_xylophone
200
+ - Horse
201
+ - Song
202
+ - Quack
203
+ - Power_tool
204
+ - Heart_sounds,_heartbeat
205
+ - Goose
206
+ - Hammond_organ
207
+ - Rock_music
208
+ - Ocean
209
+ - Mains_hum
210
+ - Thunder
211
+ - Chime
212
+ - Electronic_dance_music
213
+ - Typing
214
+ - Sink_(filling_or_washing)
215
+ - Raindrop
216
+ - Cello
217
+ - Electric_guitar
218
+ - Cheering
219
+ - Church_bell
220
+ - Christian_music
221
+ - Drum_roll
222
+ - Trombone
223
+ - Glockenspiel
224
+ - Trumpet
225
+ - Cymbal
226
+ - Tabla
227
+ - Clickety-clack
228
+ - Cricket
229
+ - Steam_whistle
230
+ - Explosion
231
+ - Saxophone
232
+ - Thunderstorm
233
+ - Pop_music
234
+ - Zither
235
+ - Applause
236
+ - Choir
237
+ - Whack,_thwack
238
+ - Clarinet
239
+ - Camera
240
+ - Electric_piano
241
+ - Independent_music
242
+ - Fire
243
+ - Frog
244
+ - Jet_engine
245
+ - Music_of_Asia
246
+ - Ding
247
+ - Waves,_surf
248
+ - Cattle,_bovinae
249
+ - Turkey
250
+ - Television
251
+ - Coo
252
+ - Scratching_(performance_technique)
253
+ - Flute
254
+ - Liquid
255
+ - Harp
256
+ - Progressive_rock
257
+ - Happy_music
258
+ - Steel_guitar,_slide_guitar
259
+ - Whoosh,_swoosh,_swish
260
+ - Boom
261
+ - Breathing
262
+ - Electronic_organ
263
+ - Environmental_noise
264
+ - Distortion
265
+ - Alarm_clock
266
+ - Fixed-wing_aircraft,_airplane
267
+ - Violin,_fiddle
268
+ - Whistling
269
+ - Accordion
270
+ - Disco
271
+ - Pump_(liquid)
272
+ - Waterfall
273
+ - Beep,_bleep
274
+ - Blues
275
+ - Grunge
276
+ - Hip_hop_music
277
+ - Whistle
278
+ - Fusillade
279
+ - Splash,_splatter
280
+ - Gush
281
+ - Toothbrush
282
+ - Knock
283
+ - Gargling
284
+ - Snoring
285
+ - Hammer
286
+ - Gobble
287
+ - Walk,_footsteps
288
+ - Jackhammer
289
+ - Filing_(rasp)
290
+ - Snort
291
+ - Narration,_monologue
292
+ - Tire_squeal
293
+ - Fire_alarm
294
+ - Squeal
295
+ - Meow
296
+ - Caterwaul
297
+ - Cutlery,_silverware
298
+ - Mantra
299
+ - Opera
300
+ - Classical_music
301
+ - Theremin
302
+ - Burst,_pop
303
+ - Drip
304
+ - Tick
305
+ - Children_shouting
306
+ - Creak
307
+ - Hiccup
308
+ - Pigeon,_dove
309
+ - Bicycle_bell
310
+ - Baby_cry,_infant_cry
311
+ - Duck
312
+ - Fireworks
313
+ - Tambourine
314
+ - Rodents,_rats,_mice
315
+ - Buzzer
316
+ - Splinter
317
+ - Writing
318
+ - Goat
319
+ - Sheep
320
+ - Heavy_metal
321
+ - Ska
322
+ - Neigh,_whinny
323
+ - Sizzle
324
+ - Rowboat,_canoe,_kayak
325
+ - Wood_block
326
+ - Clang
327
+ - Door
328
+ - Female_singing
329
+ - Stream
330
+ - Chant
331
+ - Vocal_music
332
+ - Yodeling
333
+ - Bee,_wasp,_etc.
334
+ - Air_brake
335
+ - Whir
336
+ - Bird_flight,_flapping_wings
337
+ - French_horn
338
+ - Telephone_dialing,_DTMF
339
+ - Squeak
340
+ - Sitar
341
+ - Smoke_detector,_smoke_alarm
342
+ - Tick-tock
343
+ - Gurgling
344
+ - Bellow
345
+ - Harmonic
346
+ - Male_singing
347
+ - Giggle
348
+ - Bark
349
+ - Vibration
350
+ - Drill
351
+ - Skidding
352
+ - Scratch
353
+ - Drawer_open_or_close
354
+ - Chop
355
+ - Drum_machine
356
+ - Squish
357
+ - Toilet_flush
358
+ - Fart
359
+ - Basketball_bounce
360
+ - Electronic_tuner
361
+ - Singing_bowl
362
+ - Squawk
363
+ - Conversation
364
+ - Reggae
365
+ - Funny_music
366
+ - Scrape
367
+ - Sewing_machine
368
+ - Tender_music
369
+ - Swing_music
370
+ - Dishes,_pots,_and_pans
371
+ - Sampler
372
+ - Synthesizer
373
+ - Clapping
374
+ - Hubbub,_speech_noise,_speech_babble
375
+ - Engine_knocking
376
+ - Canidae,_dogs,_wolves
377
+ - Chainsaw
378
+ - Pour
379
+ - Croak
380
+ - Chewing,_mastication
381
+ - Cowbell
382
+ - Propeller,_airscrew
383
+ - Didgeridoo
384
+ - Ringtone
385
+ - Rattle_(instrument)
386
+ - Artillery_fire
387
+ - Cash_register
388
+ - Crack
389
+ - Growling
390
+ - Mosquito
391
+ - Carnatic_music
392
+ - Honk
393
+ - Howl
394
+ - Cacophony
395
+ - Gospel_music
396
+ - Firecracker
397
+ - Strum
398
+ - Motorboat,_speedboat
399
+ - Clock
400
+ - Dance_music
401
+ - Microwave_oven
402
+ - Country
403
+ - Bluegrass
404
+ - Rattle
405
+ - Mallet_percussion
406
+ - Computer_keyboard
407
+ - Bass_guitar
408
+ - Electric_shaver,_electric_razor
409
+ - Sawing
410
+ - Owl
411
+ - Whip
412
+ - White_noise
413
+ - Chirp_tone
414
+ - Boiling
415
+ - Ship
416
+ - Mouse
417
+ - Breaking
418
+ - Silence
419
+ - Throat_clearing
420
+ - Bleat
421
+ - Salsa_music
422
+ - Patter
423
+ - Vibraphone
424
+ - Flap
425
+ - Typewriter
426
+ - Change_ringing_(campanology)
427
+ - Trickle,_dribble
428
+ - Video_game_music
429
+ - Glass
430
+ - Dial_tone
431
+ - Radio
432
+ - Bell
433
+ - Moo
434
+ - Heart_murmur
435
+ - Clatter
436
+ - Sniff
437
+ - Double_bass
438
+ - Background_music
439
+ - Lawn_mower
440
+ - Printer
441
+ - House_music
442
+ - Tearing
443
+ - Angry_music
444
+ - Male_speech,_man_speaking
445
+ - Wild_animals
446
+ - Cupboard_open_or_close
447
+ - Harpsichord
448
+ - Light_engine_(high_frequency)
449
+ - Child_singing
450
+ - Zipper_(clothing)
451
+ - Jazz
452
+ - Belly_laugh
453
+ - Roar
454
+ - Motor_vehicle_(road)
455
+ - Crowing,_cock-a-doodle-doo
456
+ - Cluck
457
+ - Sad_music
458
+ - Hi-hat
459
+ - Cough
460
+ - Stomach_rumble
461
+ - Alarm
462
+ - String_section
463
+ - Sonar
464
+ - Keys_jangling
465
+ - Synthetic_singing
466
+ - Rapping
467
+ - Sidetone
468
+ - Orchestra
469
+ - Throbbing
470
+ - Whale_vocalization
471
+ - Thunk
472
+ - Children_playing
473
+ - Snake
474
+ - Chink,_clink
475
+ - Chirp,_tweet
476
+ - Boing
477
+ - Shuffle
478
+ - Pulse
479
+ - Punk_rock
480
+ - Crow
481
+ - Caw
482
+ - Static
483
+ - Clicking
484
+ - Snicker
485
+ - Whispering
486
+ - Pink_noise
487
+ - Crushing
488
+ - Wedding_music
489
+ - Crumpling,_crinkling
490
+ - Crackle
491
+ - Whoop
492
+ - Electric_toothbrush
493
+ - Train_wheels_squealing
494
+ - Yell
495
+ - Wind_chime
496
+ - Frying_(food)
497
+ - Christmas_music
498
+ - Fill_(with_liquid)
499
+ - Reverberation
500
+ - Beatboxing
501
+ - Harmonica
502
+ - Banjo
503
+ - Sliding_door
504
+ - Groan
505
+ - Bagpipes
506
+ - Spray
507
+ - Stir
508
+ - Acoustic_guitar
509
+ - Tap
510
+ - Chorus_effect
511
+ - Noise
512
+ - Crunch
513
+ - Biting
514
+ - Aircraft_engine
515
+ - Busy_signal
516
+ - Bang
517
+ - Techno
518
+ - Tuning_fork
519
+ - Tapping_(guitar_technique)
520
+ - Pig
521
+ - Maraca
522
+ - Vacuum_cleaner
523
+ - Mandolin
524
+ - Electronica
525
+ - Theme_music
526
+ - Yip
527
+ - A_capella
528
+ - Rustle
529
+ - Chatter
530
+ - Traditional_music
531
+ - Soul_music
532
+ - Rustling_leaves
533
+ - Afrobeat
534
+ - Hoot
535
+ - Slosh
536
+ - Roaring_cats_(lions,_tigers)
537
+ - Chopping_(food)
538
+ - Heavy_engine_(low_frequency)
539
+ - Sine_wave
540
+ - Speech_synthesizer
541
+ - Middle_Eastern_music
542
+ - Music_of_Latin_America
543
+ - Arrow
544
+ - Timpani
545
+ - Eruption
546
+ - Shofar
547
+ - Jingle_bell
548
+ - Humming
549
+ - Sanding
550
+ - Female_speech,_woman_speaking
551
+ - Gong
552
+ - Rain_on_surface
553
+ - Pant
554
+ - Dubstep
555
+ - Clip-clop
556
+ - Finger_snapping
557
+ - Blender
558
+ - Drum_and_bass
559
+ - Bouncing
560
+ - Vehicle_horn,_car_horn,_honking
561
+ - Slam
562
+ - Idling
563
+ - Rhythm_and_blues
564
+ - Race_car,_auto_racing
565
+ - Single-lens_reflex_camera
566
+ - Smash,_crash
567
+ - Purr
568
+ - Shatter
569
+ - Steelpan
570
+ - Whimper_(dog)
571
+ - Power_windows,_electric_windows
572
+ - Battle_cry
573
+ - Scary_music
574
+ - Hands
575
+ - Echo
576
+ - Truck
577
+ - Buzz
578
+ - Mechanical_fan
579
+ - Plop
580
+ - Run
581
+ - Gasp
582
+ - Psychedelic_rock
583
+ - Grunt
584
+ - Helicopter
585
+ - Dental_drill,_dentist's_drill
586
+ - Babbling
587
+ - Zing
588
+ - Oink
589
+ - Soundtrack_music
590
+ - Ambulance_(siren)
591
+ - Exciting_music
592
+ - Telephone
593
+ - Jingle_(music)
594
+ - Tubular_bells
595
+ - Burping,_eructation
596
+ - Baby_laughter
597
+ - Ping
598
+ - Bow-wow
599
+ - Foghorn
600
+ - Machine_gun
601
+ - Ukulele
602
+ - Telephone_bell_ringing
603
+ - Pulleys
604
+ - Gears
605
+ - Sigh
606
+ - Coin_(dropping)
607
+ - Music_of_Africa
608
+ - Scissors
609
+ - Inside,_public_space
610
+ - Trance_music
611
+ - Roll
612
+ - Thump,_thud
613
+ - Air_conditioning
614
+ - Ding-dong
615
+ - Ratchet,_pawl
616
+ - Hair_dryer
617
+ - Shout
618
+ - Ambient_music
619
+ - Music_for_children
620
+ - Toot
621
+ - Bathtub_(filling_or_washing)
622
+ - Slap,_smack
623
+ - Chuckle,_chortle
624
+ - Traffic_noise,_roadway_noise
625
+ - Bicycle
626
+ - Whimper
627
+ - Doorbell
628
+ - Wheeze
629
+ - Sailboat,_sailing_ship
630
+ - Cap_gun
631
+ - Wail,_moan
632
+ - Rock_and_roll
633
+ - Jingle,_tinkle
634
+ - Fire_engine,_fire_truck_(siren)
635
+ - Funk
636
+ - Lullaby
637
+ - Field_recording
638
+ - Skateboard
639
+ - Steam
640
+ - Rumble
641
+ - Medium_engine_(mid_frequency)
642
+ - Sound_effect
643
+ - Flamenco
644
+ - Shuffling_cards
645
+ - Subway,_metro,_underground
646
+ - Police_car_(siren)
647
+ - Folk_music
648
+ - Crying,_sobbing
649
+ - New-age_music
650
+ - Ice_cream_truck,_ice_cream_van
651
+ - Music_of_Bollywood
652
+ - Accelerating,_revving,_vroom
653
+ - Screaming
654
+ - Motorcycle
655
+ - Engine_starting
656
+ - Train_whistle
657
+ - Car_passing_by
658
+ - Bus
659
+ - Sneeze
660
+ - Train_horn
661
+ - Air_horn,_truck_horn
662
+ - Civil_defense_siren
663
+ - Car_alarm
664
+ - Reversing_beeps
665
+ - <unk>
666
+ token_type: word
667
+ init: xavier_normal
668
+ input_size: 1
669
+ use_preprocessor: true
670
+ frontend: null
671
+ frontend_conf: {}
672
+ specaug: null
673
+ specaug_conf: {}
674
+ normalize: null
675
+ normalize_conf: {}
676
+ preencoder: null
677
+ preencoder_conf: {}
678
+ encoder: beats
679
+ encoder_conf:
680
+ beats_ckpt_path: /compute/babel-13-33/sbharad2/models/BEATs/BEATs_iter3_plus_AS20K.pt
681
+ beats_config:
682
+ layer_wise_gradient_decay_ratio: 0.3
683
+ encoder_layerdrop: 0.1
684
+ dropout: 0.0
685
+ use_weighted_representation: false
686
+ specaug_config:
687
+ apply_time_warp: true
688
+ apply_freq_mask: false
689
+ apply_time_mask: true
690
+ time_mask_width_ratio_range:
691
+ - 0
692
+ - 0.06
693
+ num_time_mask: 1
694
+ roll_augment: true
695
+ roll_interval: 1
696
+ decoder: linear
697
+ decoder_conf: {}
698
+ model: espnet
699
+ model_conf:
700
+ classification_type: multi-label
701
+ mixup_augmentation: true
702
+ lsm_weight: 0.0
703
+ required:
704
+ - output_dir
705
+ - token_list
706
+ version: '202412'
707
+ distributed: false
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/acc.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/backward_time.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/clip.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/forward_time.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/gpu_max_cached_mem_GB.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/grad_norm.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/iter_time.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss_scale.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/mAP.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim0_lr0.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim_step_time.png ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202412'
2
+ files:
3
+ classification_model_file: /compute/babel-11-13/sbharad2/beats_run/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth
4
+ python: "3.9.20 (main, Oct 3 2024, 07:27:41) \n[GCC 11.2.0]"
5
+ timestamp: 1736167061.684751
6
+ torch: 2.4.0
7
+ yaml_files:
8
+ classification_train_config: /compute/babel-11-13/sbharad2/beats_run/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml