--- |
tags: |
- espnet |
- audio |
- classification |
language: en |
datasets: |
- bean |
license: cc-by-4.0 |
--- |
## ESPnet2 CLS model |
### `espnet/BEATs-BEAN.CornellBirdIdentification` |
This model was trained by Shikhar Bharadwaj using bean recipe in [espnet](https://github.com/espnet/espnet/). |
### Demo: How to use in ESPnet2 |
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html) |
if you haven't done that already. |
```bash |
cd espnet |
git checkout 9191aa59acc7d3ceaca1f48dcc8fbdad2e03484b |
pip install -e . |
cd egs2/bean/cls1 |
./run.sh --skip_data_prep false --skip_train true --download_model espnet/BEATs-BEAN.CornellBirdIdentification |
``` |
<!-- Generated by scripts/utils/show_cls_result.sh --> |
## Environments |
- date: `Wed Jan 8 05:51:08 EST 2025` |
- python version: `3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0]` |
- espnet version: `espnet 202412` |
- pytorch version: `pytorch 2.4.0` |
- Git hash: `9191aa59acc7d3ceaca1f48dcc8fbdad2e03484b` |
- Commit date: `Tue Jan 7 04:34:03 2025 -0500` |
## cls_cbi.20250107.141123 |
|Dataset|Metric|Value| |
|---|---|---| |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.dev/score|mean_acc|68.12 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.dev/score|mAP|67.80 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.dev/score|mean_auc|93.70 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.dev/score|n_labels|264.00 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.dev/score|n_instances|3548.00 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.test/score|mean_acc|63.95 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.test/score|mAP|66.58 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.test/score|mean_auc|95.61 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.test/score|n_labels|264.00 |
/compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123/cls_cbi.test/score|n_instances|3620.00 |
## CLS config |
<details><summary>expand</summary> |
``` |
config: conf/beats_cbi.yaml |
print_config: false |
log_level: INFO |
drop_last_iter: false |
dry_run: false |
iterator_type: sequence |
valid_iterator_type: null |
output_dir: /compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_cbi.20250107.141123 |
ngpu: 1 |
seed: 0 |
num_workers: 2 |
num_att_plot: 0 |
dist_backend: nccl |
dist_init_method: env:// |
dist_world_size: null |
dist_rank: null |
local_rank: 0 |
dist_master_addr: null |
dist_master_port: null |
dist_launcher: null |
multiprocessing_distributed: false |
unused_parameters: true |
sharded_ddp: false |
use_deepspeed: false |
deepspeed_config: null |
cudnn_enabled: true |
cudnn_benchmark: false |
cudnn_deterministic: true |
use_tf32: false |
collect_stats: false |
write_collected_feats: false |
max_epoch: 250 |
patience: null |
val_scheduler_criterion: |
- valid |
- loss |
early_stopping_criterion: |
- valid |
- loss |
- min |
best_model_criterion: |
- - valid |
- acc |
- max |
keep_nbest_models: 1 |
nbest_averaging_interval: 0 |
grad_clip: 1 |
grad_clip_type: 2.0 |
grad_noise: false |
accum_grad: 1 |
no_forward_run: false |
resume: true |
train_dtype: float32 |
use_amp: false |
log_interval: null |
use_matplotlib: true |
use_tensorboard: true |
create_graph_in_tensorboard: false |
use_wandb: false |
wandb_project: null |
wandb_id: null |
wandb_entity: null |
wandb_name: null |
wandb_model_log_interval: -1 |
detect_anomaly: false |
use_adapter: false |
adapter: lora |
save_strategy: all |
adapter_conf: {} |
pretrain_path: null |
init_param: [] |
ignore_init_mismatch: false |
freeze_param: [] |
num_iters_per_epoch: null |
batch_size: 32 |
valid_batch_size: 32 |
batch_bins: 1000000 |
valid_batch_bins: null |
category_sample_size: 10 |
train_shape_file: |
- /compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_stats_16k/train/speech_shape |
- /compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_stats_16k/train/label_shape |
valid_shape_file: |
- /compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_stats_16k/valid/speech_shape |
- /compute/babel-11-13/sbharad2/beats_run/bean.cbi/exp/cls_stats_16k/valid/label_shape |
batch_type: folded |
valid_batch_type: null |
fold_length: |
- 160000 |
- 5 |
sort_in_batch: descending |
shuffle_within_batch: false |
sort_batch: descending |
multiple_iterator: false |
chunk_length: 500 |
chunk_shift_ratio: 0.5 |
num_cache_chunks: 1024 |
chunk_excluded_key_prefixes: [] |
chunk_default_fs: null |
chunk_max_abs_length: null |
chunk_discard_short_samples: true |
train_data_path_and_name_and_type: |
- - /compute/babel-11-13/sbharad2/beats_run/bean.cbi/dump/cbi.train/wav.scp |
- speech |
- sound |
- - /compute/babel-11-13/sbharad2/beats_run/bean.cbi/dump/cbi.train/text |
- label |
- text |
valid_data_path_and_name_and_type: |
- - /compute/babel-11-13/sbharad2/beats_run/bean.cbi/dump/cbi.dev/wav.scp |
- speech |
- sound |
- - /compute/babel-11-13/sbharad2/beats_run/bean.cbi/dump/cbi.dev/text |
- label |
- text |
multi_task_dataset: false |
allow_variable_data_keys: false |
max_cache_size: 0.0 |
max_cache_fd: 32 |
allow_multi_rates: false |
valid_max_cache_size: null |
exclude_weight_decay: false |
exclude_weight_decay_conf: {} |
optim: adamw |
optim_conf: |
lr: 3.0e-05 |
weight_decay: 0.01 |
betas: |
- 0.9 |
- 0.98 |
scheduler: cosineannealingwarmuprestarts |
scheduler_conf: |
first_cycle_steps: 95000 |
warmup_steps: 8000 |
max_lr: 3.0e-05 |
min_lr: 5.0e-06 |
token_list: |
- scoori |
- bulori |
- bushti |
- blkpho |
- brthum |
- cacwre |
- pasfly |
- lesgol |
- logshr |
- macwar |
- pinsis |
- whbnut |
- hamfly |
- normoc |
- grtgra |
- houwre |
- comyel |
- grhowl |
- houfin |
- rocpig |
- annhum |
- astfly |
- magwar |
- wesmea |
- wewpew |
- spotow |
- amerob |
- daejun |
- easmea |
- greroa |
- mouchi |
- pilwoo |
- comrav |
- hoowar |
- savspa |
- warvir |
- easblu |
- gnttow |
- ovenbi1 |
- rewbla |
- robgro |
- swathr |
- tuftit |
- westan |
- winwre3 |
- btywar |
- carwre |
- herthr |
- bewwre |
- sora |
- brdowl |
- buggna |
- casvir |
- chispa |
- fiespa |
- aldfly |
- killde |
- moudov |
- rebwoo |
- bkpwar |
- dowwoo |
- greegr |
- banswa |
- orcwar |
- plsvir |
- y00475 |
- blugrb1 |
- gockin |
- greyel |
- larspa |
- osprey |
- sonspa |
- yebfly |
- blujay |
- brnthr |
- canwre |
- clanut |
- comred |
- eastow |
- haiwoo |
- lesyel |
- amepip |
- easpho |
- fiscro |
- sposan |
- wooscj2 |
- bkhgro |
- labwoo |
- lazbun |
- marwre |
- stejay |
- weskin |
- bkbwar |
- buhvir |
- cangoo |
- canwar |
- dusfly |
- grcfly |
- norcar |
- wilsni1 |
- yerwar |
- yetvir |
- eucdov |
- linspa |
- norpar |
- olsfly |
- rebnut |
- scatan |
- bnhcow |
- louwat |
- norfli |
- veery |
- woothr |
- btnwar |
- cedwax |
- chswar |
- comgra |
- indbun |
- leabit |
- leafly |
- pinwar |
- reevir1 |
- solsan |
- bktspa |
- foxspa |
- houspa |
- snobun |
- vesspa |
- yelwar |
- brespa |
- comgol |
- coohaw |
- gnwtea |
- grbher3 |
- hergul |
- mallar3 |
- swaspa |
- brncre |
- btbwar |
- caster1 |
- eawpew |
- rethaw |
- rocwre |
- ruckin |
- semsan |
- whtspa |
- wlswar |
- bkcchi |
- bkchum |
- amered |
- norwat |
- whcspa |
- grycat |
- balori |
- purfin |
- treswa |
- wilfly |
- comter |
- belspa2 |
- juntit1 |
- comnig |
- reshaw |
- snogoo |
- gadwal |
- perfal |
- grnher |
- horlar |
- lobdow |
- bawwar |
- amegfi |
- commer |
- ribgul |
- casfin |
- pibgre |
- evegro |
- pygnut |
- brwhaw |
- gryfly |
- leasan |
- barswa |
- phaino |
- amecro |
- calqua |
- amewoo |
- pingro |
- saypho |
- semplo |
- buwwar |
- boboli |
- amekes |
- cowscj1 |
- amtspa |
- lobcur |
- belkin1 |
- pecsan |
- prawar |
- vigswa |
- camwar |
- easkin |
- yebsap |
- norsho |
- gocspa |
- rufhum |
- baisan |
- cliswa |
- pinjay |
- comloo |
- baleag |
- merlin |
- yehbla |
- calgul |
- goleag |
- nutwoo |
- rusbla |
- eursta |
- ameavo |
- lesnig |
- palwar |
- bkbmag1 |
- brebla |
- sagthr |
- bkbcuc |
- wesgre |
- redcro |
- wiltur |
- amebit |
- sagspa1 |
- tunswa |
- wooduc |
- renpha |
- whtswi |
- bongul |
- norhar2 |
- doccor |
- lotduc |
- chukar |
- horgre |
- nrwswa |
- sheowl |
- wesblu |
- whfibi |
- buwtea |
- norpin |
- eargre |
- rebsap |
- lewwoo |
- rebmer |
- wessan |
- chiswi |
- lecthr |
- rthhum |
- moublu |
- amewig |
- rinduc |
- shshaw |
- rufgro |
- swahaw |
- coshum |
- truswa |
- rudduc |
- buffle |
- hoomer |
- gcrfin |
- redhea |
- <unk> |
token_type: word |
init: xavier_normal |
input_size: 1 |
use_preprocessor: true |
frontend: null |
frontend_conf: {} |
specaug: null |
specaug_conf: {} |
normalize: null |
normalize_conf: {} |
preencoder: null |
preencoder_conf: {} |
encoder: beats |
encoder_conf: |
beats_ckpt_path: /compute/babel-13-33/sbharad2/models/BEATs/BEATs_iter3.pt |
beats_config: |
layer_wise_gradient_decay_ratio: 0.3 |
encoder_layerdrop: 0.1 |
dropout: 0.0 |
use_weighted_representation: false |
specaug_config: |
apply_time_warp: true |
apply_freq_mask: false |
apply_time_mask: true |
time_mask_width_ratio_range: |
- 0 |
- 0.06 |
num_time_mask: 1 |
roll_augment: true |
roll_interval: 1 |
decoder: linear |
decoder_conf: {} |
model: espnet |
model_conf: |
classification_type: multi-class |
mixup_augmentation: false |
lsm_weight: 0.1 |
required: |
- output_dir |
- token_list |
version: '202412' |
distributed: false |
``` |
</details> |
### Citing ESPnet |
```BibTex |
@inproceedings{watanabe2018espnet, |
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
year={2018}, |
booktitle={Proceedings of Interspeech}, |
pages={2207--2211}, |
doi={10.21437/Interspeech.2018-1456}, |
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
} |
``` |
or arXiv: |
```bibtex |
@misc{watanabe2018espnet, |
title={ESPnet: End-to-End Speech Processing Toolkit}, |
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
year={2018}, |
eprint={1804.00015}, |
archivePrefix={arXiv}, |
primaryClass={cs.CL} |
} |
``` |