shikhar7ssu
commited on
Upload 19 files
Browse files- README.md +799 -0
- as20k_fulltrain/data/token_list +528 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth +3 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/RESULTS.md +19 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml +707 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/acc.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/backward_time.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/clip.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/forward_time.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/gpu_max_cached_mem_GB.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/grad_norm.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/iter_time.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss_scale.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/mAP.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim0_lr0.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim_step_time.png +0 -0
- as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/train_time.png +0 -0
- meta.yaml +8 -0
README.md
ADDED
@@ -0,0 +1,799 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- espnet
|
4 |
+
- audio
|
5 |
+
- classification
|
6 |
+
language: en
|
7 |
+
datasets:
|
8 |
+
- as20k
|
9 |
+
license: cc-by-4.0
|
10 |
+
---
|
11 |
+
|
12 |
+
## ESPnet2 CLS model
|
13 |
+
|
14 |
+
### `shikhar7ssu/BEATs-AS20K`
|
15 |
+
|
16 |
+
This model was trained by Shikhar Bharadwaj using as20k recipe in [espnet](https://github.com/espnet/espnet/).
|
17 |
+
|
18 |
+
### Demo: How to use in ESPnet2
|
19 |
+
|
20 |
+
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
|
21 |
+
if you haven't done that already.
|
22 |
+
|
23 |
+
```bash
|
24 |
+
cd espnet
|
25 |
+
git checkout 9634114cd3c35e230f4a9dda752e982512517653
|
26 |
+
pip install -e .
|
27 |
+
cd egs2/as20k/cls1
|
28 |
+
./run.sh --skip_data_prep false --skip_train true --download_model shikhar7ssu/BEATs-AS20K
|
29 |
+
```
|
30 |
+
|
31 |
+
<!-- Generated by scripts/utils/show_cls_result.sh -->
|
32 |
+
# RESULTS
|
33 |
+
## Environments
|
34 |
+
- date: `Fri Jan 3 23:25:40 EST 2025`
|
35 |
+
- python version: `3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0]`
|
36 |
+
- espnet version: `espnet 202412`
|
37 |
+
- pytorch version: `pytorch 2.4.0`
|
38 |
+
- Git hash: `635b3add116ae68c056f7aa67f64591c9ba7eb3e`
|
39 |
+
- Commit date: `Thu Jan 2 11:46:32 2025 -0500`
|
40 |
+
|
41 |
+
## cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
|
42 |
+
|Dataset|Metric|Value|
|
43 |
+
|---|---|---|
|
44 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_acc|47.73
|
45 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mAP|37.46
|
46 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_auc|96.58
|
47 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_labels|527.00
|
48 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_instances|20123.00
|
49 |
+
|
50 |
+
## CLS config
|
51 |
+
|
52 |
+
<details><summary>expand</summary>
|
53 |
+
|
54 |
+
```
|
55 |
+
config: conf/beats_cls.yaml
|
56 |
+
print_config: false
|
57 |
+
log_level: INFO
|
58 |
+
drop_last_iter: false
|
59 |
+
dry_run: false
|
60 |
+
iterator_type: sequence
|
61 |
+
valid_iterator_type: null
|
62 |
+
output_dir: ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
|
63 |
+
ngpu: 1
|
64 |
+
seed: 0
|
65 |
+
num_workers: 2
|
66 |
+
num_att_plot: 0
|
67 |
+
dist_backend: nccl
|
68 |
+
dist_init_method: env://
|
69 |
+
dist_world_size: null
|
70 |
+
dist_rank: null
|
71 |
+
local_rank: 0
|
72 |
+
dist_master_addr: null
|
73 |
+
dist_master_port: null
|
74 |
+
dist_launcher: null
|
75 |
+
multiprocessing_distributed: false
|
76 |
+
unused_parameters: true
|
77 |
+
sharded_ddp: false
|
78 |
+
use_deepspeed: false
|
79 |
+
deepspeed_config: null
|
80 |
+
cudnn_enabled: true
|
81 |
+
cudnn_benchmark: false
|
82 |
+
cudnn_deterministic: true
|
83 |
+
use_tf32: false
|
84 |
+
collect_stats: false
|
85 |
+
write_collected_feats: false
|
86 |
+
max_epoch: 160
|
87 |
+
patience: null
|
88 |
+
val_scheduler_criterion:
|
89 |
+
- valid
|
90 |
+
- loss
|
91 |
+
early_stopping_criterion:
|
92 |
+
- valid
|
93 |
+
- loss
|
94 |
+
- min
|
95 |
+
best_model_criterion:
|
96 |
+
- - valid
|
97 |
+
- mAP
|
98 |
+
- max
|
99 |
+
keep_nbest_models: 1
|
100 |
+
nbest_averaging_interval: 0
|
101 |
+
grad_clip: 1
|
102 |
+
grad_clip_type: 2.0
|
103 |
+
grad_noise: false
|
104 |
+
accum_grad: 1
|
105 |
+
no_forward_run: false
|
106 |
+
resume: true
|
107 |
+
train_dtype: float32
|
108 |
+
use_amp: false
|
109 |
+
log_interval: null
|
110 |
+
use_matplotlib: true
|
111 |
+
use_tensorboard: true
|
112 |
+
create_graph_in_tensorboard: false
|
113 |
+
use_wandb: false
|
114 |
+
wandb_project: null
|
115 |
+
wandb_id: null
|
116 |
+
wandb_entity: null
|
117 |
+
wandb_name: null
|
118 |
+
wandb_model_log_interval: -1
|
119 |
+
detect_anomaly: false
|
120 |
+
use_adapter: false
|
121 |
+
adapter: lora
|
122 |
+
save_strategy: all
|
123 |
+
adapter_conf: {}
|
124 |
+
pretrain_path: null
|
125 |
+
init_param: []
|
126 |
+
ignore_init_mismatch: false
|
127 |
+
freeze_param: []
|
128 |
+
num_iters_per_epoch: null
|
129 |
+
batch_size: 80
|
130 |
+
valid_batch_size: 1200
|
131 |
+
batch_bins: 1000000
|
132 |
+
valid_batch_bins: null
|
133 |
+
category_sample_size: 10
|
134 |
+
train_shape_file:
|
135 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/speech_shape
|
136 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/label_shape
|
137 |
+
valid_shape_file:
|
138 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/speech_shape
|
139 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/label_shape
|
140 |
+
batch_type: folded
|
141 |
+
valid_batch_type: null
|
142 |
+
fold_length:
|
143 |
+
- 160000
|
144 |
+
- 600
|
145 |
+
sort_in_batch: descending
|
146 |
+
shuffle_within_batch: false
|
147 |
+
sort_batch: descending
|
148 |
+
multiple_iterator: false
|
149 |
+
chunk_length: 500
|
150 |
+
chunk_shift_ratio: 0.5
|
151 |
+
num_cache_chunks: 1024
|
152 |
+
chunk_excluded_key_prefixes: []
|
153 |
+
chunk_default_fs: null
|
154 |
+
chunk_max_abs_length: null
|
155 |
+
chunk_discard_short_samples: true
|
156 |
+
train_data_path_and_name_and_type:
|
157 |
+
- - ./beats_runs/as20k_fulltrain/dump/train/wav.scp
|
158 |
+
- speech
|
159 |
+
- sound
|
160 |
+
- - ./beats_runs/as20k_fulltrain/dump/train/text
|
161 |
+
- label
|
162 |
+
- text
|
163 |
+
valid_data_path_and_name_and_type:
|
164 |
+
- - ./beats_runs/as20k_fulltrain/dump/val/wav.scp
|
165 |
+
- speech
|
166 |
+
- sound
|
167 |
+
- - ./beats_runs/as20k_fulltrain/dump/val/text
|
168 |
+
- label
|
169 |
+
- text
|
170 |
+
multi_task_dataset: false
|
171 |
+
allow_variable_data_keys: false
|
172 |
+
max_cache_size: 0.0
|
173 |
+
max_cache_fd: 32
|
174 |
+
allow_multi_rates: false
|
175 |
+
valid_max_cache_size: null
|
176 |
+
exclude_weight_decay: false
|
177 |
+
exclude_weight_decay_conf: {}
|
178 |
+
optim: adamw
|
179 |
+
optim_conf:
|
180 |
+
lr: 3.0e-05
|
181 |
+
weight_decay: 0.01
|
182 |
+
betas:
|
183 |
+
- 0.9
|
184 |
+
- 0.98
|
185 |
+
scheduler: cosineannealingwarmuprestarts
|
186 |
+
scheduler_conf:
|
187 |
+
first_cycle_steps: 95000
|
188 |
+
warmup_steps: 8000
|
189 |
+
max_lr: 3.0e-05
|
190 |
+
min_lr: 5.0e-06
|
191 |
+
token_list:
|
192 |
+
- Music
|
193 |
+
- Speech
|
194 |
+
- Vehicle
|
195 |
+
- Inside,_small_room
|
196 |
+
- Animal
|
197 |
+
- Musical_instrument
|
198 |
+
- Singing
|
199 |
+
- Domestic_animals,_pets
|
200 |
+
- Guitar
|
201 |
+
- Plucked_string_instrument
|
202 |
+
- Water
|
203 |
+
- Car
|
204 |
+
- Dog
|
205 |
+
- Percussion
|
206 |
+
- Wind_instrument,_woodwind_instrument
|
207 |
+
- Outside,_urban_or_manmade
|
208 |
+
- Outside,_rural_or_natural
|
209 |
+
- Boat,_Water_vehicle
|
210 |
+
- Brass_instrument
|
211 |
+
- Fowl
|
212 |
+
- Drum
|
213 |
+
- Siren
|
214 |
+
- Engine
|
215 |
+
- Bird
|
216 |
+
- Insect
|
217 |
+
- Gunshot,_gunfire
|
218 |
+
- Wood
|
219 |
+
- Rail_transport
|
220 |
+
- Train
|
221 |
+
- Wind
|
222 |
+
- Inside,_large_room_or_hall
|
223 |
+
- Railroad_car,_train_wagon
|
224 |
+
- Child_speech,_kid_speaking
|
225 |
+
- Crowd
|
226 |
+
- Rub
|
227 |
+
- Keyboard_(musical)
|
228 |
+
- Wind_noise_(microphone)
|
229 |
+
- Pizzicato
|
230 |
+
- Emergency_vehicle
|
231 |
+
- Bird_vocalization,_bird_call,_bird_song
|
232 |
+
- Livestock,_farm_animals,_working_animals
|
233 |
+
- Cat
|
234 |
+
- Organ
|
235 |
+
- Fly,_housefly
|
236 |
+
- Mechanisms
|
237 |
+
- Bowed_string_instrument
|
238 |
+
- Rain
|
239 |
+
- Laughter
|
240 |
+
- Aircraft
|
241 |
+
- Electronic_music
|
242 |
+
- Effects_unit
|
243 |
+
- Hum
|
244 |
+
- Tools
|
245 |
+
- Drum_kit
|
246 |
+
- Snare_drum
|
247 |
+
- Hiss
|
248 |
+
- Piano
|
249 |
+
- Water_tap,_faucet
|
250 |
+
- Rimshot
|
251 |
+
- Bass_drum
|
252 |
+
- Chicken,_rooster
|
253 |
+
- Marimba,_xylophone
|
254 |
+
- Horse
|
255 |
+
- Song
|
256 |
+
- Quack
|
257 |
+
- Power_tool
|
258 |
+
- Heart_sounds,_heartbeat
|
259 |
+
- Goose
|
260 |
+
- Hammond_organ
|
261 |
+
- Rock_music
|
262 |
+
- Ocean
|
263 |
+
- Mains_hum
|
264 |
+
- Thunder
|
265 |
+
- Chime
|
266 |
+
- Electronic_dance_music
|
267 |
+
- Typing
|
268 |
+
- Sink_(filling_or_washing)
|
269 |
+
- Raindrop
|
270 |
+
- Cello
|
271 |
+
- Electric_guitar
|
272 |
+
- Cheering
|
273 |
+
- Church_bell
|
274 |
+
- Christian_music
|
275 |
+
- Drum_roll
|
276 |
+
- Trombone
|
277 |
+
- Glockenspiel
|
278 |
+
- Trumpet
|
279 |
+
- Cymbal
|
280 |
+
- Tabla
|
281 |
+
- Clickety-clack
|
282 |
+
- Cricket
|
283 |
+
- Steam_whistle
|
284 |
+
- Explosion
|
285 |
+
- Saxophone
|
286 |
+
- Thunderstorm
|
287 |
+
- Pop_music
|
288 |
+
- Zither
|
289 |
+
- Applause
|
290 |
+
- Choir
|
291 |
+
- Whack,_thwack
|
292 |
+
- Clarinet
|
293 |
+
- Camera
|
294 |
+
- Electric_piano
|
295 |
+
- Independent_music
|
296 |
+
- Fire
|
297 |
+
- Frog
|
298 |
+
- Jet_engine
|
299 |
+
- Music_of_Asia
|
300 |
+
- Ding
|
301 |
+
- Waves,_surf
|
302 |
+
- Cattle,_bovinae
|
303 |
+
- Turkey
|
304 |
+
- Television
|
305 |
+
- Coo
|
306 |
+
- Scratching_(performance_technique)
|
307 |
+
- Flute
|
308 |
+
- Liquid
|
309 |
+
- Harp
|
310 |
+
- Progressive_rock
|
311 |
+
- Happy_music
|
312 |
+
- Steel_guitar,_slide_guitar
|
313 |
+
- Whoosh,_swoosh,_swish
|
314 |
+
- Boom
|
315 |
+
- Breathing
|
316 |
+
- Electronic_organ
|
317 |
+
- Environmental_noise
|
318 |
+
- Distortion
|
319 |
+
- Alarm_clock
|
320 |
+
- Fixed-wing_aircraft,_airplane
|
321 |
+
- Violin,_fiddle
|
322 |
+
- Whistling
|
323 |
+
- Accordion
|
324 |
+
- Disco
|
325 |
+
- Pump_(liquid)
|
326 |
+
- Waterfall
|
327 |
+
- Beep,_bleep
|
328 |
+
- Blues
|
329 |
+
- Grunge
|
330 |
+
- Hip_hop_music
|
331 |
+
- Whistle
|
332 |
+
- Fusillade
|
333 |
+
- Splash,_splatter
|
334 |
+
- Gush
|
335 |
+
- Toothbrush
|
336 |
+
- Knock
|
337 |
+
- Gargling
|
338 |
+
- Snoring
|
339 |
+
- Hammer
|
340 |
+
- Gobble
|
341 |
+
- Walk,_footsteps
|
342 |
+
- Jackhammer
|
343 |
+
- Filing_(rasp)
|
344 |
+
- Snort
|
345 |
+
- Narration,_monologue
|
346 |
+
- Tire_squeal
|
347 |
+
- Fire_alarm
|
348 |
+
- Squeal
|
349 |
+
- Meow
|
350 |
+
- Caterwaul
|
351 |
+
- Cutlery,_silverware
|
352 |
+
- Mantra
|
353 |
+
- Opera
|
354 |
+
- Classical_music
|
355 |
+
- Theremin
|
356 |
+
- Burst,_pop
|
357 |
+
- Drip
|
358 |
+
- Tick
|
359 |
+
- Children_shouting
|
360 |
+
- Creak
|
361 |
+
- Hiccup
|
362 |
+
- Pigeon,_dove
|
363 |
+
- Bicycle_bell
|
364 |
+
- Baby_cry,_infant_cry
|
365 |
+
- Duck
|
366 |
+
- Fireworks
|
367 |
+
- Tambourine
|
368 |
+
- Rodents,_rats,_mice
|
369 |
+
- Buzzer
|
370 |
+
- Splinter
|
371 |
+
- Writing
|
372 |
+
- Goat
|
373 |
+
- Sheep
|
374 |
+
- Heavy_metal
|
375 |
+
- Ska
|
376 |
+
- Neigh,_whinny
|
377 |
+
- Sizzle
|
378 |
+
- Rowboat,_canoe,_kayak
|
379 |
+
- Wood_block
|
380 |
+
- Clang
|
381 |
+
- Door
|
382 |
+
- Female_singing
|
383 |
+
- Stream
|
384 |
+
- Chant
|
385 |
+
- Vocal_music
|
386 |
+
- Yodeling
|
387 |
+
- Bee,_wasp,_etc.
|
388 |
+
- Air_brake
|
389 |
+
- Whir
|
390 |
+
- Bird_flight,_flapping_wings
|
391 |
+
- French_horn
|
392 |
+
- Telephone_dialing,_DTMF
|
393 |
+
- Squeak
|
394 |
+
- Sitar
|
395 |
+
- Smoke_detector,_smoke_alarm
|
396 |
+
- Tick-tock
|
397 |
+
- Gurgling
|
398 |
+
- Bellow
|
399 |
+
- Harmonic
|
400 |
+
- Male_singing
|
401 |
+
- Giggle
|
402 |
+
- Bark
|
403 |
+
- Vibration
|
404 |
+
- Drill
|
405 |
+
- Skidding
|
406 |
+
- Scratch
|
407 |
+
- Drawer_open_or_close
|
408 |
+
- Chop
|
409 |
+
- Drum_machine
|
410 |
+
- Squish
|
411 |
+
- Toilet_flush
|
412 |
+
- Fart
|
413 |
+
- Basketball_bounce
|
414 |
+
- Electronic_tuner
|
415 |
+
- Singing_bowl
|
416 |
+
- Squawk
|
417 |
+
- Conversation
|
418 |
+
- Reggae
|
419 |
+
- Funny_music
|
420 |
+
- Scrape
|
421 |
+
- Sewing_machine
|
422 |
+
- Tender_music
|
423 |
+
- Swing_music
|
424 |
+
- Dishes,_pots,_and_pans
|
425 |
+
- Sampler
|
426 |
+
- Synthesizer
|
427 |
+
- Clapping
|
428 |
+
- Hubbub,_speech_noise,_speech_babble
|
429 |
+
- Engine_knocking
|
430 |
+
- Canidae,_dogs,_wolves
|
431 |
+
- Chainsaw
|
432 |
+
- Pour
|
433 |
+
- Croak
|
434 |
+
- Chewing,_mastication
|
435 |
+
- Cowbell
|
436 |
+
- Propeller,_airscrew
|
437 |
+
- Didgeridoo
|
438 |
+
- Ringtone
|
439 |
+
- Rattle_(instrument)
|
440 |
+
- Artillery_fire
|
441 |
+
- Cash_register
|
442 |
+
- Crack
|
443 |
+
- Growling
|
444 |
+
- Mosquito
|
445 |
+
- Carnatic_music
|
446 |
+
- Honk
|
447 |
+
- Howl
|
448 |
+
- Cacophony
|
449 |
+
- Gospel_music
|
450 |
+
- Firecracker
|
451 |
+
- Strum
|
452 |
+
- Motorboat,_speedboat
|
453 |
+
- Clock
|
454 |
+
- Dance_music
|
455 |
+
- Microwave_oven
|
456 |
+
- Country
|
457 |
+
- Bluegrass
|
458 |
+
- Rattle
|
459 |
+
- Mallet_percussion
|
460 |
+
- Computer_keyboard
|
461 |
+
- Bass_guitar
|
462 |
+
- Electric_shaver,_electric_razor
|
463 |
+
- Sawing
|
464 |
+
- Owl
|
465 |
+
- Whip
|
466 |
+
- White_noise
|
467 |
+
- Chirp_tone
|
468 |
+
- Boiling
|
469 |
+
- Ship
|
470 |
+
- Mouse
|
471 |
+
- Breaking
|
472 |
+
- Silence
|
473 |
+
- Throat_clearing
|
474 |
+
- Bleat
|
475 |
+
- Salsa_music
|
476 |
+
- Patter
|
477 |
+
- Vibraphone
|
478 |
+
- Flap
|
479 |
+
- Typewriter
|
480 |
+
- Change_ringing_(campanology)
|
481 |
+
- Trickle,_dribble
|
482 |
+
- Video_game_music
|
483 |
+
- Glass
|
484 |
+
- Dial_tone
|
485 |
+
- Radio
|
486 |
+
- Bell
|
487 |
+
- Moo
|
488 |
+
- Heart_murmur
|
489 |
+
- Clatter
|
490 |
+
- Sniff
|
491 |
+
- Double_bass
|
492 |
+
- Background_music
|
493 |
+
- Lawn_mower
|
494 |
+
- Printer
|
495 |
+
- House_music
|
496 |
+
- Tearing
|
497 |
+
- Angry_music
|
498 |
+
- Male_speech,_man_speaking
|
499 |
+
- Wild_animals
|
500 |
+
- Cupboard_open_or_close
|
501 |
+
- Harpsichord
|
502 |
+
- Light_engine_(high_frequency)
|
503 |
+
- Child_singing
|
504 |
+
- Zipper_(clothing)
|
505 |
+
- Jazz
|
506 |
+
- Belly_laugh
|
507 |
+
- Roar
|
508 |
+
- Motor_vehicle_(road)
|
509 |
+
- Crowing,_cock-a-doodle-doo
|
510 |
+
- Cluck
|
511 |
+
- Sad_music
|
512 |
+
- Hi-hat
|
513 |
+
- Cough
|
514 |
+
- Stomach_rumble
|
515 |
+
- Alarm
|
516 |
+
- String_section
|
517 |
+
- Sonar
|
518 |
+
- Keys_jangling
|
519 |
+
- Synthetic_singing
|
520 |
+
- Rapping
|
521 |
+
- Sidetone
|
522 |
+
- Orchestra
|
523 |
+
- Throbbing
|
524 |
+
- Whale_vocalization
|
525 |
+
- Thunk
|
526 |
+
- Children_playing
|
527 |
+
- Snake
|
528 |
+
- Chink,_clink
|
529 |
+
- Chirp,_tweet
|
530 |
+
- Boing
|
531 |
+
- Shuffle
|
532 |
+
- Pulse
|
533 |
+
- Punk_rock
|
534 |
+
- Crow
|
535 |
+
- Caw
|
536 |
+
- Static
|
537 |
+
- Clicking
|
538 |
+
- Snicker
|
539 |
+
- Whispering
|
540 |
+
- Pink_noise
|
541 |
+
- Crushing
|
542 |
+
- Wedding_music
|
543 |
+
- Crumpling,_crinkling
|
544 |
+
- Crackle
|
545 |
+
- Whoop
|
546 |
+
- Electric_toothbrush
|
547 |
+
- Train_wheels_squealing
|
548 |
+
- Yell
|
549 |
+
- Wind_chime
|
550 |
+
- Frying_(food)
|
551 |
+
- Christmas_music
|
552 |
+
- Fill_(with_liquid)
|
553 |
+
- Reverberation
|
554 |
+
- Beatboxing
|
555 |
+
- Harmonica
|
556 |
+
- Banjo
|
557 |
+
- Sliding_door
|
558 |
+
- Groan
|
559 |
+
- Bagpipes
|
560 |
+
- Spray
|
561 |
+
- Stir
|
562 |
+
- Acoustic_guitar
|
563 |
+
- Tap
|
564 |
+
- Chorus_effect
|
565 |
+
- Noise
|
566 |
+
- Crunch
|
567 |
+
- Biting
|
568 |
+
- Aircraft_engine
|
569 |
+
- Busy_signal
|
570 |
+
- Bang
|
571 |
+
- Techno
|
572 |
+
- Tuning_fork
|
573 |
+
- Tapping_(guitar_technique)
|
574 |
+
- Pig
|
575 |
+
- Maraca
|
576 |
+
- Vacuum_cleaner
|
577 |
+
- Mandolin
|
578 |
+
- Electronica
|
579 |
+
- Theme_music
|
580 |
+
- Yip
|
581 |
+
- A_capella
|
582 |
+
- Rustle
|
583 |
+
- Chatter
|
584 |
+
- Traditional_music
|
585 |
+
- Soul_music
|
586 |
+
- Rustling_leaves
|
587 |
+
- Afrobeat
|
588 |
+
- Hoot
|
589 |
+
- Slosh
|
590 |
+
- Roaring_cats_(lions,_tigers)
|
591 |
+
- Chopping_(food)
|
592 |
+
- Heavy_engine_(low_frequency)
|
593 |
+
- Sine_wave
|
594 |
+
- Speech_synthesizer
|
595 |
+
- Middle_Eastern_music
|
596 |
+
- Music_of_Latin_America
|
597 |
+
- Arrow
|
598 |
+
- Timpani
|
599 |
+
- Eruption
|
600 |
+
- Shofar
|
601 |
+
- Jingle_bell
|
602 |
+
- Humming
|
603 |
+
- Sanding
|
604 |
+
- Female_speech,_woman_speaking
|
605 |
+
- Gong
|
606 |
+
- Rain_on_surface
|
607 |
+
- Pant
|
608 |
+
- Dubstep
|
609 |
+
- Clip-clop
|
610 |
+
- Finger_snapping
|
611 |
+
- Blender
|
612 |
+
- Drum_and_bass
|
613 |
+
- Bouncing
|
614 |
+
- Vehicle_horn,_car_horn,_honking
|
615 |
+
- Slam
|
616 |
+
- Idling
|
617 |
+
- Rhythm_and_blues
|
618 |
+
- Race_car,_auto_racing
|
619 |
+
- Single-lens_reflex_camera
|
620 |
+
- Smash,_crash
|
621 |
+
- Purr
|
622 |
+
- Shatter
|
623 |
+
- Steelpan
|
624 |
+
- Whimper_(dog)
|
625 |
+
- Power_windows,_electric_windows
|
626 |
+
- Battle_cry
|
627 |
+
- Scary_music
|
628 |
+
- Hands
|
629 |
+
- Echo
|
630 |
+
- Truck
|
631 |
+
- Buzz
|
632 |
+
- Mechanical_fan
|
633 |
+
- Plop
|
634 |
+
- Run
|
635 |
+
- Gasp
|
636 |
+
- Psychedelic_rock
|
637 |
+
- Grunt
|
638 |
+
- Helicopter
|
639 |
+
- Dental_drill,_dentist's_drill
|
640 |
+
- Babbling
|
641 |
+
- Zing
|
642 |
+
- Oink
|
643 |
+
- Soundtrack_music
|
644 |
+
- Ambulance_(siren)
|
645 |
+
- Exciting_music
|
646 |
+
- Telephone
|
647 |
+
- Jingle_(music)
|
648 |
+
- Tubular_bells
|
649 |
+
- Burping,_eructation
|
650 |
+
- Baby_laughter
|
651 |
+
- Ping
|
652 |
+
- Bow-wow
|
653 |
+
- Foghorn
|
654 |
+
- Machine_gun
|
655 |
+
- Ukulele
|
656 |
+
- Telephone_bell_ringing
|
657 |
+
- Pulleys
|
658 |
+
- Gears
|
659 |
+
- Sigh
|
660 |
+
- Coin_(dropping)
|
661 |
+
- Music_of_Africa
|
662 |
+
- Scissors
|
663 |
+
- Inside,_public_space
|
664 |
+
- Trance_music
|
665 |
+
- Roll
|
666 |
+
- Thump,_thud
|
667 |
+
- Air_conditioning
|
668 |
+
- Ding-dong
|
669 |
+
- Ratchet,_pawl
|
670 |
+
- Hair_dryer
|
671 |
+
- Shout
|
672 |
+
- Ambient_music
|
673 |
+
- Music_for_children
|
674 |
+
- Toot
|
675 |
+
- Bathtub_(filling_or_washing)
|
676 |
+
- Slap,_smack
|
677 |
+
- Chuckle,_chortle
|
678 |
+
- Traffic_noise,_roadway_noise
|
679 |
+
- Bicycle
|
680 |
+
- Whimper
|
681 |
+
- Doorbell
|
682 |
+
- Wheeze
|
683 |
+
- Sailboat,_sailing_ship
|
684 |
+
- Cap_gun
|
685 |
+
- Wail,_moan
|
686 |
+
- Rock_and_roll
|
687 |
+
- Jingle,_tinkle
|
688 |
+
- Fire_engine,_fire_truck_(siren)
|
689 |
+
- Funk
|
690 |
+
- Lullaby
|
691 |
+
- Field_recording
|
692 |
+
- Skateboard
|
693 |
+
- Steam
|
694 |
+
- Rumble
|
695 |
+
- Medium_engine_(mid_frequency)
|
696 |
+
- Sound_effect
|
697 |
+
- Flamenco
|
698 |
+
- Shuffling_cards
|
699 |
+
- Subway,_metro,_underground
|
700 |
+
- Police_car_(siren)
|
701 |
+
- Folk_music
|
702 |
+
- Crying,_sobbing
|
703 |
+
- New-age_music
|
704 |
+
- Ice_cream_truck,_ice_cream_van
|
705 |
+
- Music_of_Bollywood
|
706 |
+
- Accelerating,_revving,_vroom
|
707 |
+
- Screaming
|
708 |
+
- Motorcycle
|
709 |
+
- Engine_starting
|
710 |
+
- Train_whistle
|
711 |
+
- Car_passing_by
|
712 |
+
- Bus
|
713 |
+
- Sneeze
|
714 |
+
- Train_horn
|
715 |
+
- Air_horn,_truck_horn
|
716 |
+
- Civil_defense_siren
|
717 |
+
- Car_alarm
|
718 |
+
- Reversing_beeps
|
719 |
+
- <unk>
|
720 |
+
token_type: word
|
721 |
+
init: xavier_normal
|
722 |
+
input_size: 1
|
723 |
+
use_preprocessor: true
|
724 |
+
frontend: null
|
725 |
+
frontend_conf: {}
|
726 |
+
specaug: null
|
727 |
+
specaug_conf: {}
|
728 |
+
normalize: null
|
729 |
+
normalize_conf: {}
|
730 |
+
preencoder: null
|
731 |
+
preencoder_conf: {}
|
732 |
+
encoder: beats
|
733 |
+
encoder_conf:
|
734 |
+
beats_ckpt_path: /compute/babel-13-33/sbharad2/models/BEATs/BEATs_iter3_plus_AS20K.pt
|
735 |
+
beats_config:
|
736 |
+
layer_wise_gradient_decay_ratio: 0.3
|
737 |
+
encoder_layerdrop: 0.1
|
738 |
+
dropout: 0.0
|
739 |
+
use_weighted_representation: false
|
740 |
+
specaug_config:
|
741 |
+
apply_time_warp: true
|
742 |
+
apply_freq_mask: false
|
743 |
+
apply_time_mask: true
|
744 |
+
time_mask_width_ratio_range:
|
745 |
+
- 0
|
746 |
+
- 0.06
|
747 |
+
num_time_mask: 1
|
748 |
+
roll_augment: true
|
749 |
+
roll_interval: 1
|
750 |
+
decoder: linear
|
751 |
+
decoder_conf: {}
|
752 |
+
model: espnet
|
753 |
+
model_conf:
|
754 |
+
classification_type: multi-label
|
755 |
+
mixup_augmentation: true
|
756 |
+
lsm_weight: 0.0
|
757 |
+
required:
|
758 |
+
- output_dir
|
759 |
+
- token_list
|
760 |
+
version: '202412'
|
761 |
+
distributed: false
|
762 |
+
```
|
763 |
+
|
764 |
+
</details>
|
765 |
+
|
766 |
+
|
767 |
+
|
768 |
+
### Citing ESPnet
|
769 |
+
|
770 |
+
```BibTex
|
771 |
+
@inproceedings{watanabe2018espnet,
|
772 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
773 |
+
title={{ESPnet}: End-to-End Speech Processing Toolkit},
|
774 |
+
year={2018},
|
775 |
+
booktitle={Proceedings of Interspeech},
|
776 |
+
pages={2207--2211},
|
777 |
+
doi={10.21437/Interspeech.2018-1456},
|
778 |
+
url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
|
779 |
+
}
|
780 |
+
|
781 |
+
|
782 |
+
|
783 |
+
|
784 |
+
|
785 |
+
|
786 |
+
```
|
787 |
+
|
788 |
+
or arXiv:
|
789 |
+
|
790 |
+
```bibtex
|
791 |
+
@misc{watanabe2018espnet,
|
792 |
+
title={ESPnet: End-to-End Speech Processing Toolkit},
|
793 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
794 |
+
year={2018},
|
795 |
+
eprint={1804.00015},
|
796 |
+
archivePrefix={arXiv},
|
797 |
+
primaryClass={cs.CL}
|
798 |
+
}
|
799 |
+
```
|
as20k_fulltrain/data/token_list
ADDED
@@ -0,0 +1,528 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Music
|
2 |
+
Speech
|
3 |
+
Vehicle
|
4 |
+
Inside,_small_room
|
5 |
+
Animal
|
6 |
+
Musical_instrument
|
7 |
+
Singing
|
8 |
+
Domestic_animals,_pets
|
9 |
+
Guitar
|
10 |
+
Plucked_string_instrument
|
11 |
+
Water
|
12 |
+
Car
|
13 |
+
Dog
|
14 |
+
Percussion
|
15 |
+
Wind_instrument,_woodwind_instrument
|
16 |
+
Outside,_urban_or_manmade
|
17 |
+
Outside,_rural_or_natural
|
18 |
+
Boat,_Water_vehicle
|
19 |
+
Brass_instrument
|
20 |
+
Fowl
|
21 |
+
Drum
|
22 |
+
Siren
|
23 |
+
Engine
|
24 |
+
Bird
|
25 |
+
Insect
|
26 |
+
Gunshot,_gunfire
|
27 |
+
Wood
|
28 |
+
Rail_transport
|
29 |
+
Train
|
30 |
+
Wind
|
31 |
+
Inside,_large_room_or_hall
|
32 |
+
Railroad_car,_train_wagon
|
33 |
+
Child_speech,_kid_speaking
|
34 |
+
Crowd
|
35 |
+
Rub
|
36 |
+
Keyboard_(musical)
|
37 |
+
Wind_noise_(microphone)
|
38 |
+
Pizzicato
|
39 |
+
Emergency_vehicle
|
40 |
+
Bird_vocalization,_bird_call,_bird_song
|
41 |
+
Livestock,_farm_animals,_working_animals
|
42 |
+
Cat
|
43 |
+
Organ
|
44 |
+
Fly,_housefly
|
45 |
+
Mechanisms
|
46 |
+
Bowed_string_instrument
|
47 |
+
Rain
|
48 |
+
Laughter
|
49 |
+
Aircraft
|
50 |
+
Electronic_music
|
51 |
+
Effects_unit
|
52 |
+
Hum
|
53 |
+
Tools
|
54 |
+
Drum_kit
|
55 |
+
Snare_drum
|
56 |
+
Hiss
|
57 |
+
Piano
|
58 |
+
Water_tap,_faucet
|
59 |
+
Rimshot
|
60 |
+
Bass_drum
|
61 |
+
Chicken,_rooster
|
62 |
+
Marimba,_xylophone
|
63 |
+
Horse
|
64 |
+
Song
|
65 |
+
Quack
|
66 |
+
Power_tool
|
67 |
+
Heart_sounds,_heartbeat
|
68 |
+
Goose
|
69 |
+
Hammond_organ
|
70 |
+
Rock_music
|
71 |
+
Ocean
|
72 |
+
Mains_hum
|
73 |
+
Thunder
|
74 |
+
Chime
|
75 |
+
Electronic_dance_music
|
76 |
+
Typing
|
77 |
+
Sink_(filling_or_washing)
|
78 |
+
Raindrop
|
79 |
+
Cello
|
80 |
+
Electric_guitar
|
81 |
+
Cheering
|
82 |
+
Church_bell
|
83 |
+
Christian_music
|
84 |
+
Drum_roll
|
85 |
+
Trombone
|
86 |
+
Glockenspiel
|
87 |
+
Trumpet
|
88 |
+
Cymbal
|
89 |
+
Tabla
|
90 |
+
Clickety-clack
|
91 |
+
Cricket
|
92 |
+
Steam_whistle
|
93 |
+
Explosion
|
94 |
+
Saxophone
|
95 |
+
Thunderstorm
|
96 |
+
Pop_music
|
97 |
+
Zither
|
98 |
+
Applause
|
99 |
+
Choir
|
100 |
+
Whack,_thwack
|
101 |
+
Clarinet
|
102 |
+
Camera
|
103 |
+
Electric_piano
|
104 |
+
Independent_music
|
105 |
+
Fire
|
106 |
+
Frog
|
107 |
+
Jet_engine
|
108 |
+
Music_of_Asia
|
109 |
+
Ding
|
110 |
+
Waves,_surf
|
111 |
+
Cattle,_bovinae
|
112 |
+
Turkey
|
113 |
+
Television
|
114 |
+
Coo
|
115 |
+
Scratching_(performance_technique)
|
116 |
+
Flute
|
117 |
+
Liquid
|
118 |
+
Harp
|
119 |
+
Progressive_rock
|
120 |
+
Happy_music
|
121 |
+
Steel_guitar,_slide_guitar
|
122 |
+
Whoosh,_swoosh,_swish
|
123 |
+
Boom
|
124 |
+
Breathing
|
125 |
+
Electronic_organ
|
126 |
+
Environmental_noise
|
127 |
+
Distortion
|
128 |
+
Alarm_clock
|
129 |
+
Fixed-wing_aircraft,_airplane
|
130 |
+
Violin,_fiddle
|
131 |
+
Whistling
|
132 |
+
Accordion
|
133 |
+
Disco
|
134 |
+
Pump_(liquid)
|
135 |
+
Waterfall
|
136 |
+
Beep,_bleep
|
137 |
+
Blues
|
138 |
+
Grunge
|
139 |
+
Hip_hop_music
|
140 |
+
Whistle
|
141 |
+
Fusillade
|
142 |
+
Splash,_splatter
|
143 |
+
Gush
|
144 |
+
Toothbrush
|
145 |
+
Knock
|
146 |
+
Gargling
|
147 |
+
Snoring
|
148 |
+
Hammer
|
149 |
+
Gobble
|
150 |
+
Walk,_footsteps
|
151 |
+
Jackhammer
|
152 |
+
Filing_(rasp)
|
153 |
+
Snort
|
154 |
+
Narration,_monologue
|
155 |
+
Tire_squeal
|
156 |
+
Fire_alarm
|
157 |
+
Squeal
|
158 |
+
Meow
|
159 |
+
Caterwaul
|
160 |
+
Cutlery,_silverware
|
161 |
+
Mantra
|
162 |
+
Opera
|
163 |
+
Classical_music
|
164 |
+
Theremin
|
165 |
+
Burst,_pop
|
166 |
+
Drip
|
167 |
+
Tick
|
168 |
+
Children_shouting
|
169 |
+
Creak
|
170 |
+
Hiccup
|
171 |
+
Pigeon,_dove
|
172 |
+
Bicycle_bell
|
173 |
+
Baby_cry,_infant_cry
|
174 |
+
Duck
|
175 |
+
Fireworks
|
176 |
+
Tambourine
|
177 |
+
Rodents,_rats,_mice
|
178 |
+
Buzzer
|
179 |
+
Splinter
|
180 |
+
Writing
|
181 |
+
Goat
|
182 |
+
Sheep
|
183 |
+
Heavy_metal
|
184 |
+
Ska
|
185 |
+
Neigh,_whinny
|
186 |
+
Sizzle
|
187 |
+
Rowboat,_canoe,_kayak
|
188 |
+
Wood_block
|
189 |
+
Clang
|
190 |
+
Door
|
191 |
+
Female_singing
|
192 |
+
Stream
|
193 |
+
Chant
|
194 |
+
Vocal_music
|
195 |
+
Yodeling
|
196 |
+
Bee,_wasp,_etc.
|
197 |
+
Air_brake
|
198 |
+
Whir
|
199 |
+
Bird_flight,_flapping_wings
|
200 |
+
French_horn
|
201 |
+
Telephone_dialing,_DTMF
|
202 |
+
Squeak
|
203 |
+
Sitar
|
204 |
+
Smoke_detector,_smoke_alarm
|
205 |
+
Tick-tock
|
206 |
+
Gurgling
|
207 |
+
Bellow
|
208 |
+
Harmonic
|
209 |
+
Male_singing
|
210 |
+
Giggle
|
211 |
+
Bark
|
212 |
+
Vibration
|
213 |
+
Drill
|
214 |
+
Skidding
|
215 |
+
Scratch
|
216 |
+
Drawer_open_or_close
|
217 |
+
Chop
|
218 |
+
Drum_machine
|
219 |
+
Squish
|
220 |
+
Toilet_flush
|
221 |
+
Fart
|
222 |
+
Basketball_bounce
|
223 |
+
Electronic_tuner
|
224 |
+
Singing_bowl
|
225 |
+
Squawk
|
226 |
+
Conversation
|
227 |
+
Reggae
|
228 |
+
Funny_music
|
229 |
+
Scrape
|
230 |
+
Sewing_machine
|
231 |
+
Tender_music
|
232 |
+
Swing_music
|
233 |
+
Dishes,_pots,_and_pans
|
234 |
+
Sampler
|
235 |
+
Synthesizer
|
236 |
+
Clapping
|
237 |
+
Hubbub,_speech_noise,_speech_babble
|
238 |
+
Engine_knocking
|
239 |
+
Canidae,_dogs,_wolves
|
240 |
+
Chainsaw
|
241 |
+
Pour
|
242 |
+
Croak
|
243 |
+
Chewing,_mastication
|
244 |
+
Cowbell
|
245 |
+
Propeller,_airscrew
|
246 |
+
Didgeridoo
|
247 |
+
Ringtone
|
248 |
+
Rattle_(instrument)
|
249 |
+
Artillery_fire
|
250 |
+
Cash_register
|
251 |
+
Crack
|
252 |
+
Growling
|
253 |
+
Mosquito
|
254 |
+
Carnatic_music
|
255 |
+
Honk
|
256 |
+
Howl
|
257 |
+
Cacophony
|
258 |
+
Gospel_music
|
259 |
+
Firecracker
|
260 |
+
Strum
|
261 |
+
Motorboat,_speedboat
|
262 |
+
Clock
|
263 |
+
Dance_music
|
264 |
+
Microwave_oven
|
265 |
+
Country
|
266 |
+
Bluegrass
|
267 |
+
Rattle
|
268 |
+
Mallet_percussion
|
269 |
+
Computer_keyboard
|
270 |
+
Bass_guitar
|
271 |
+
Electric_shaver,_electric_razor
|
272 |
+
Sawing
|
273 |
+
Owl
|
274 |
+
Whip
|
275 |
+
White_noise
|
276 |
+
Chirp_tone
|
277 |
+
Boiling
|
278 |
+
Ship
|
279 |
+
Mouse
|
280 |
+
Breaking
|
281 |
+
Silence
|
282 |
+
Throat_clearing
|
283 |
+
Bleat
|
284 |
+
Salsa_music
|
285 |
+
Patter
|
286 |
+
Vibraphone
|
287 |
+
Flap
|
288 |
+
Typewriter
|
289 |
+
Change_ringing_(campanology)
|
290 |
+
Trickle,_dribble
|
291 |
+
Video_game_music
|
292 |
+
Glass
|
293 |
+
Dial_tone
|
294 |
+
Radio
|
295 |
+
Bell
|
296 |
+
Moo
|
297 |
+
Heart_murmur
|
298 |
+
Clatter
|
299 |
+
Sniff
|
300 |
+
Double_bass
|
301 |
+
Background_music
|
302 |
+
Lawn_mower
|
303 |
+
Printer
|
304 |
+
House_music
|
305 |
+
Tearing
|
306 |
+
Angry_music
|
307 |
+
Male_speech,_man_speaking
|
308 |
+
Wild_animals
|
309 |
+
Cupboard_open_or_close
|
310 |
+
Harpsichord
|
311 |
+
Light_engine_(high_frequency)
|
312 |
+
Child_singing
|
313 |
+
Zipper_(clothing)
|
314 |
+
Jazz
|
315 |
+
Belly_laugh
|
316 |
+
Roar
|
317 |
+
Motor_vehicle_(road)
|
318 |
+
Crowing,_cock-a-doodle-doo
|
319 |
+
Cluck
|
320 |
+
Sad_music
|
321 |
+
Hi-hat
|
322 |
+
Cough
|
323 |
+
Stomach_rumble
|
324 |
+
Alarm
|
325 |
+
String_section
|
326 |
+
Sonar
|
327 |
+
Keys_jangling
|
328 |
+
Synthetic_singing
|
329 |
+
Rapping
|
330 |
+
Sidetone
|
331 |
+
Orchestra
|
332 |
+
Throbbing
|
333 |
+
Whale_vocalization
|
334 |
+
Thunk
|
335 |
+
Children_playing
|
336 |
+
Snake
|
337 |
+
Chink,_clink
|
338 |
+
Chirp,_tweet
|
339 |
+
Boing
|
340 |
+
Shuffle
|
341 |
+
Pulse
|
342 |
+
Punk_rock
|
343 |
+
Crow
|
344 |
+
Caw
|
345 |
+
Static
|
346 |
+
Clicking
|
347 |
+
Snicker
|
348 |
+
Whispering
|
349 |
+
Pink_noise
|
350 |
+
Crushing
|
351 |
+
Wedding_music
|
352 |
+
Crumpling,_crinkling
|
353 |
+
Crackle
|
354 |
+
Whoop
|
355 |
+
Electric_toothbrush
|
356 |
+
Train_wheels_squealing
|
357 |
+
Yell
|
358 |
+
Wind_chime
|
359 |
+
Frying_(food)
|
360 |
+
Christmas_music
|
361 |
+
Fill_(with_liquid)
|
362 |
+
Reverberation
|
363 |
+
Beatboxing
|
364 |
+
Harmonica
|
365 |
+
Banjo
|
366 |
+
Sliding_door
|
367 |
+
Groan
|
368 |
+
Bagpipes
|
369 |
+
Spray
|
370 |
+
Stir
|
371 |
+
Acoustic_guitar
|
372 |
+
Tap
|
373 |
+
Chorus_effect
|
374 |
+
Noise
|
375 |
+
Crunch
|
376 |
+
Biting
|
377 |
+
Aircraft_engine
|
378 |
+
Busy_signal
|
379 |
+
Bang
|
380 |
+
Techno
|
381 |
+
Tuning_fork
|
382 |
+
Tapping_(guitar_technique)
|
383 |
+
Pig
|
384 |
+
Maraca
|
385 |
+
Vacuum_cleaner
|
386 |
+
Mandolin
|
387 |
+
Electronica
|
388 |
+
Theme_music
|
389 |
+
Yip
|
390 |
+
A_capella
|
391 |
+
Rustle
|
392 |
+
Chatter
|
393 |
+
Traditional_music
|
394 |
+
Soul_music
|
395 |
+
Rustling_leaves
|
396 |
+
Afrobeat
|
397 |
+
Hoot
|
398 |
+
Slosh
|
399 |
+
Roaring_cats_(lions,_tigers)
|
400 |
+
Chopping_(food)
|
401 |
+
Heavy_engine_(low_frequency)
|
402 |
+
Sine_wave
|
403 |
+
Speech_synthesizer
|
404 |
+
Middle_Eastern_music
|
405 |
+
Music_of_Latin_America
|
406 |
+
Arrow
|
407 |
+
Timpani
|
408 |
+
Eruption
|
409 |
+
Shofar
|
410 |
+
Jingle_bell
|
411 |
+
Humming
|
412 |
+
Sanding
|
413 |
+
Female_speech,_woman_speaking
|
414 |
+
Gong
|
415 |
+
Rain_on_surface
|
416 |
+
Pant
|
417 |
+
Dubstep
|
418 |
+
Clip-clop
|
419 |
+
Finger_snapping
|
420 |
+
Blender
|
421 |
+
Drum_and_bass
|
422 |
+
Bouncing
|
423 |
+
Vehicle_horn,_car_horn,_honking
|
424 |
+
Slam
|
425 |
+
Idling
|
426 |
+
Rhythm_and_blues
|
427 |
+
Race_car,_auto_racing
|
428 |
+
Single-lens_reflex_camera
|
429 |
+
Smash,_crash
|
430 |
+
Purr
|
431 |
+
Shatter
|
432 |
+
Steelpan
|
433 |
+
Whimper_(dog)
|
434 |
+
Power_windows,_electric_windows
|
435 |
+
Battle_cry
|
436 |
+
Scary_music
|
437 |
+
Hands
|
438 |
+
Echo
|
439 |
+
Truck
|
440 |
+
Buzz
|
441 |
+
Mechanical_fan
|
442 |
+
Plop
|
443 |
+
Run
|
444 |
+
Gasp
|
445 |
+
Psychedelic_rock
|
446 |
+
Grunt
|
447 |
+
Helicopter
|
448 |
+
Dental_drill,_dentist's_drill
|
449 |
+
Babbling
|
450 |
+
Zing
|
451 |
+
Oink
|
452 |
+
Soundtrack_music
|
453 |
+
Ambulance_(siren)
|
454 |
+
Exciting_music
|
455 |
+
Telephone
|
456 |
+
Jingle_(music)
|
457 |
+
Tubular_bells
|
458 |
+
Burping,_eructation
|
459 |
+
Baby_laughter
|
460 |
+
Ping
|
461 |
+
Bow-wow
|
462 |
+
Foghorn
|
463 |
+
Machine_gun
|
464 |
+
Ukulele
|
465 |
+
Telephone_bell_ringing
|
466 |
+
Pulleys
|
467 |
+
Gears
|
468 |
+
Sigh
|
469 |
+
Coin_(dropping)
|
470 |
+
Music_of_Africa
|
471 |
+
Scissors
|
472 |
+
Inside,_public_space
|
473 |
+
Trance_music
|
474 |
+
Roll
|
475 |
+
Thump,_thud
|
476 |
+
Air_conditioning
|
477 |
+
Ding-dong
|
478 |
+
Ratchet,_pawl
|
479 |
+
Hair_dryer
|
480 |
+
Shout
|
481 |
+
Ambient_music
|
482 |
+
Music_for_children
|
483 |
+
Toot
|
484 |
+
Bathtub_(filling_or_washing)
|
485 |
+
Slap,_smack
|
486 |
+
Chuckle,_chortle
|
487 |
+
Traffic_noise,_roadway_noise
|
488 |
+
Bicycle
|
489 |
+
Whimper
|
490 |
+
Doorbell
|
491 |
+
Wheeze
|
492 |
+
Sailboat,_sailing_ship
|
493 |
+
Cap_gun
|
494 |
+
Wail,_moan
|
495 |
+
Rock_and_roll
|
496 |
+
Jingle,_tinkle
|
497 |
+
Fire_engine,_fire_truck_(siren)
|
498 |
+
Funk
|
499 |
+
Lullaby
|
500 |
+
Field_recording
|
501 |
+
Skateboard
|
502 |
+
Steam
|
503 |
+
Rumble
|
504 |
+
Medium_engine_(mid_frequency)
|
505 |
+
Sound_effect
|
506 |
+
Flamenco
|
507 |
+
Shuffling_cards
|
508 |
+
Subway,_metro,_underground
|
509 |
+
Police_car_(siren)
|
510 |
+
Folk_music
|
511 |
+
Crying,_sobbing
|
512 |
+
New-age_music
|
513 |
+
Ice_cream_truck,_ice_cream_van
|
514 |
+
Music_of_Bollywood
|
515 |
+
Accelerating,_revving,_vroom
|
516 |
+
Screaming
|
517 |
+
Motorcycle
|
518 |
+
Engine_starting
|
519 |
+
Train_whistle
|
520 |
+
Car_passing_by
|
521 |
+
Bus
|
522 |
+
Sneeze
|
523 |
+
Train_horn
|
524 |
+
Air_horn,_truck_horn
|
525 |
+
Civil_defense_siren
|
526 |
+
Car_alarm
|
527 |
+
Reversing_beeps
|
528 |
+
<unk>
|
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c997fd80ffa768463445ff5c32b5835580d294cf0b538b8aef0a9f866622964f
|
3 |
+
size 362963013
|
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/RESULTS.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!-- Generated by scripts/utils/show_cls_result.sh -->
|
2 |
+
# RESULTS
|
3 |
+
## Environments
|
4 |
+
- date: `Fri Jan 3 23:25:40 EST 2025`
|
5 |
+
- python version: `3.9.20 (main, Oct 3 2024, 07:27:41) [GCC 11.2.0]`
|
6 |
+
- espnet version: `espnet 202412`
|
7 |
+
- pytorch version: `pytorch 2.4.0`
|
8 |
+
- Git hash: `635b3add116ae68c056f7aa67f64591c9ba7eb3e`
|
9 |
+
- Commit date: `Thu Jan 2 11:46:32 2025 -0500`
|
10 |
+
|
11 |
+
## cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
|
12 |
+
|Dataset|Metric|Value|
|
13 |
+
|---|---|---|
|
14 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_acc|47.73
|
15 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mAP|37.46
|
16 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|mean_auc|96.58
|
17 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_labels|527.00
|
18 |
+
./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/cls_eval/score|n_instances|20123.00
|
19 |
+
|
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml
ADDED
@@ -0,0 +1,707 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config: conf/beats_cls.yaml
|
2 |
+
print_config: false
|
3 |
+
log_level: INFO
|
4 |
+
drop_last_iter: false
|
5 |
+
dry_run: false
|
6 |
+
iterator_type: sequence
|
7 |
+
valid_iterator_type: null
|
8 |
+
output_dir: ./beats_runs/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644
|
9 |
+
ngpu: 1
|
10 |
+
seed: 0
|
11 |
+
num_workers: 2
|
12 |
+
num_att_plot: 0
|
13 |
+
dist_backend: nccl
|
14 |
+
dist_init_method: env://
|
15 |
+
dist_world_size: null
|
16 |
+
dist_rank: null
|
17 |
+
local_rank: 0
|
18 |
+
dist_master_addr: null
|
19 |
+
dist_master_port: null
|
20 |
+
dist_launcher: null
|
21 |
+
multiprocessing_distributed: false
|
22 |
+
unused_parameters: true
|
23 |
+
sharded_ddp: false
|
24 |
+
use_deepspeed: false
|
25 |
+
deepspeed_config: null
|
26 |
+
cudnn_enabled: true
|
27 |
+
cudnn_benchmark: false
|
28 |
+
cudnn_deterministic: true
|
29 |
+
use_tf32: false
|
30 |
+
collect_stats: false
|
31 |
+
write_collected_feats: false
|
32 |
+
max_epoch: 160
|
33 |
+
patience: null
|
34 |
+
val_scheduler_criterion:
|
35 |
+
- valid
|
36 |
+
- loss
|
37 |
+
early_stopping_criterion:
|
38 |
+
- valid
|
39 |
+
- loss
|
40 |
+
- min
|
41 |
+
best_model_criterion:
|
42 |
+
- - valid
|
43 |
+
- mAP
|
44 |
+
- max
|
45 |
+
keep_nbest_models: 1
|
46 |
+
nbest_averaging_interval: 0
|
47 |
+
grad_clip: 1
|
48 |
+
grad_clip_type: 2.0
|
49 |
+
grad_noise: false
|
50 |
+
accum_grad: 1
|
51 |
+
no_forward_run: false
|
52 |
+
resume: true
|
53 |
+
train_dtype: float32
|
54 |
+
use_amp: false
|
55 |
+
log_interval: null
|
56 |
+
use_matplotlib: true
|
57 |
+
use_tensorboard: true
|
58 |
+
create_graph_in_tensorboard: false
|
59 |
+
use_wandb: false
|
60 |
+
wandb_project: null
|
61 |
+
wandb_id: null
|
62 |
+
wandb_entity: null
|
63 |
+
wandb_name: null
|
64 |
+
wandb_model_log_interval: -1
|
65 |
+
detect_anomaly: false
|
66 |
+
use_adapter: false
|
67 |
+
adapter: lora
|
68 |
+
save_strategy: all
|
69 |
+
adapter_conf: {}
|
70 |
+
pretrain_path: null
|
71 |
+
init_param: []
|
72 |
+
ignore_init_mismatch: false
|
73 |
+
freeze_param: []
|
74 |
+
num_iters_per_epoch: null
|
75 |
+
batch_size: 80
|
76 |
+
valid_batch_size: 1200
|
77 |
+
batch_bins: 1000000
|
78 |
+
valid_batch_bins: null
|
79 |
+
category_sample_size: 10
|
80 |
+
train_shape_file:
|
81 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/speech_shape
|
82 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/train/label_shape
|
83 |
+
valid_shape_file:
|
84 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/speech_shape
|
85 |
+
- ./beats_runs/as20k_fulltrain/exp/cls_stats_16k/valid/label_shape
|
86 |
+
batch_type: folded
|
87 |
+
valid_batch_type: null
|
88 |
+
fold_length:
|
89 |
+
- 160000
|
90 |
+
- 600
|
91 |
+
sort_in_batch: descending
|
92 |
+
shuffle_within_batch: false
|
93 |
+
sort_batch: descending
|
94 |
+
multiple_iterator: false
|
95 |
+
chunk_length: 500
|
96 |
+
chunk_shift_ratio: 0.5
|
97 |
+
num_cache_chunks: 1024
|
98 |
+
chunk_excluded_key_prefixes: []
|
99 |
+
chunk_default_fs: null
|
100 |
+
chunk_max_abs_length: null
|
101 |
+
chunk_discard_short_samples: true
|
102 |
+
train_data_path_and_name_and_type:
|
103 |
+
- - ./beats_runs/as20k_fulltrain/dump/train/wav.scp
|
104 |
+
- speech
|
105 |
+
- sound
|
106 |
+
- - ./beats_runs/as20k_fulltrain/dump/train/text
|
107 |
+
- label
|
108 |
+
- text
|
109 |
+
valid_data_path_and_name_and_type:
|
110 |
+
- - ./beats_runs/as20k_fulltrain/dump/val/wav.scp
|
111 |
+
- speech
|
112 |
+
- sound
|
113 |
+
- - ./beats_runs/as20k_fulltrain/dump/val/text
|
114 |
+
- label
|
115 |
+
- text
|
116 |
+
multi_task_dataset: false
|
117 |
+
allow_variable_data_keys: false
|
118 |
+
max_cache_size: 0.0
|
119 |
+
max_cache_fd: 32
|
120 |
+
allow_multi_rates: false
|
121 |
+
valid_max_cache_size: null
|
122 |
+
exclude_weight_decay: false
|
123 |
+
exclude_weight_decay_conf: {}
|
124 |
+
optim: adamw
|
125 |
+
optim_conf:
|
126 |
+
lr: 3.0e-05
|
127 |
+
weight_decay: 0.01
|
128 |
+
betas:
|
129 |
+
- 0.9
|
130 |
+
- 0.98
|
131 |
+
scheduler: cosineannealingwarmuprestarts
|
132 |
+
scheduler_conf:
|
133 |
+
first_cycle_steps: 95000
|
134 |
+
warmup_steps: 8000
|
135 |
+
max_lr: 3.0e-05
|
136 |
+
min_lr: 5.0e-06
|
137 |
+
token_list:
|
138 |
+
- Music
|
139 |
+
- Speech
|
140 |
+
- Vehicle
|
141 |
+
- Inside,_small_room
|
142 |
+
- Animal
|
143 |
+
- Musical_instrument
|
144 |
+
- Singing
|
145 |
+
- Domestic_animals,_pets
|
146 |
+
- Guitar
|
147 |
+
- Plucked_string_instrument
|
148 |
+
- Water
|
149 |
+
- Car
|
150 |
+
- Dog
|
151 |
+
- Percussion
|
152 |
+
- Wind_instrument,_woodwind_instrument
|
153 |
+
- Outside,_urban_or_manmade
|
154 |
+
- Outside,_rural_or_natural
|
155 |
+
- Boat,_Water_vehicle
|
156 |
+
- Brass_instrument
|
157 |
+
- Fowl
|
158 |
+
- Drum
|
159 |
+
- Siren
|
160 |
+
- Engine
|
161 |
+
- Bird
|
162 |
+
- Insect
|
163 |
+
- Gunshot,_gunfire
|
164 |
+
- Wood
|
165 |
+
- Rail_transport
|
166 |
+
- Train
|
167 |
+
- Wind
|
168 |
+
- Inside,_large_room_or_hall
|
169 |
+
- Railroad_car,_train_wagon
|
170 |
+
- Child_speech,_kid_speaking
|
171 |
+
- Crowd
|
172 |
+
- Rub
|
173 |
+
- Keyboard_(musical)
|
174 |
+
- Wind_noise_(microphone)
|
175 |
+
- Pizzicato
|
176 |
+
- Emergency_vehicle
|
177 |
+
- Bird_vocalization,_bird_call,_bird_song
|
178 |
+
- Livestock,_farm_animals,_working_animals
|
179 |
+
- Cat
|
180 |
+
- Organ
|
181 |
+
- Fly,_housefly
|
182 |
+
- Mechanisms
|
183 |
+
- Bowed_string_instrument
|
184 |
+
- Rain
|
185 |
+
- Laughter
|
186 |
+
- Aircraft
|
187 |
+
- Electronic_music
|
188 |
+
- Effects_unit
|
189 |
+
- Hum
|
190 |
+
- Tools
|
191 |
+
- Drum_kit
|
192 |
+
- Snare_drum
|
193 |
+
- Hiss
|
194 |
+
- Piano
|
195 |
+
- Water_tap,_faucet
|
196 |
+
- Rimshot
|
197 |
+
- Bass_drum
|
198 |
+
- Chicken,_rooster
|
199 |
+
- Marimba,_xylophone
|
200 |
+
- Horse
|
201 |
+
- Song
|
202 |
+
- Quack
|
203 |
+
- Power_tool
|
204 |
+
- Heart_sounds,_heartbeat
|
205 |
+
- Goose
|
206 |
+
- Hammond_organ
|
207 |
+
- Rock_music
|
208 |
+
- Ocean
|
209 |
+
- Mains_hum
|
210 |
+
- Thunder
|
211 |
+
- Chime
|
212 |
+
- Electronic_dance_music
|
213 |
+
- Typing
|
214 |
+
- Sink_(filling_or_washing)
|
215 |
+
- Raindrop
|
216 |
+
- Cello
|
217 |
+
- Electric_guitar
|
218 |
+
- Cheering
|
219 |
+
- Church_bell
|
220 |
+
- Christian_music
|
221 |
+
- Drum_roll
|
222 |
+
- Trombone
|
223 |
+
- Glockenspiel
|
224 |
+
- Trumpet
|
225 |
+
- Cymbal
|
226 |
+
- Tabla
|
227 |
+
- Clickety-clack
|
228 |
+
- Cricket
|
229 |
+
- Steam_whistle
|
230 |
+
- Explosion
|
231 |
+
- Saxophone
|
232 |
+
- Thunderstorm
|
233 |
+
- Pop_music
|
234 |
+
- Zither
|
235 |
+
- Applause
|
236 |
+
- Choir
|
237 |
+
- Whack,_thwack
|
238 |
+
- Clarinet
|
239 |
+
- Camera
|
240 |
+
- Electric_piano
|
241 |
+
- Independent_music
|
242 |
+
- Fire
|
243 |
+
- Frog
|
244 |
+
- Jet_engine
|
245 |
+
- Music_of_Asia
|
246 |
+
- Ding
|
247 |
+
- Waves,_surf
|
248 |
+
- Cattle,_bovinae
|
249 |
+
- Turkey
|
250 |
+
- Television
|
251 |
+
- Coo
|
252 |
+
- Scratching_(performance_technique)
|
253 |
+
- Flute
|
254 |
+
- Liquid
|
255 |
+
- Harp
|
256 |
+
- Progressive_rock
|
257 |
+
- Happy_music
|
258 |
+
- Steel_guitar,_slide_guitar
|
259 |
+
- Whoosh,_swoosh,_swish
|
260 |
+
- Boom
|
261 |
+
- Breathing
|
262 |
+
- Electronic_organ
|
263 |
+
- Environmental_noise
|
264 |
+
- Distortion
|
265 |
+
- Alarm_clock
|
266 |
+
- Fixed-wing_aircraft,_airplane
|
267 |
+
- Violin,_fiddle
|
268 |
+
- Whistling
|
269 |
+
- Accordion
|
270 |
+
- Disco
|
271 |
+
- Pump_(liquid)
|
272 |
+
- Waterfall
|
273 |
+
- Beep,_bleep
|
274 |
+
- Blues
|
275 |
+
- Grunge
|
276 |
+
- Hip_hop_music
|
277 |
+
- Whistle
|
278 |
+
- Fusillade
|
279 |
+
- Splash,_splatter
|
280 |
+
- Gush
|
281 |
+
- Toothbrush
|
282 |
+
- Knock
|
283 |
+
- Gargling
|
284 |
+
- Snoring
|
285 |
+
- Hammer
|
286 |
+
- Gobble
|
287 |
+
- Walk,_footsteps
|
288 |
+
- Jackhammer
|
289 |
+
- Filing_(rasp)
|
290 |
+
- Snort
|
291 |
+
- Narration,_monologue
|
292 |
+
- Tire_squeal
|
293 |
+
- Fire_alarm
|
294 |
+
- Squeal
|
295 |
+
- Meow
|
296 |
+
- Caterwaul
|
297 |
+
- Cutlery,_silverware
|
298 |
+
- Mantra
|
299 |
+
- Opera
|
300 |
+
- Classical_music
|
301 |
+
- Theremin
|
302 |
+
- Burst,_pop
|
303 |
+
- Drip
|
304 |
+
- Tick
|
305 |
+
- Children_shouting
|
306 |
+
- Creak
|
307 |
+
- Hiccup
|
308 |
+
- Pigeon,_dove
|
309 |
+
- Bicycle_bell
|
310 |
+
- Baby_cry,_infant_cry
|
311 |
+
- Duck
|
312 |
+
- Fireworks
|
313 |
+
- Tambourine
|
314 |
+
- Rodents,_rats,_mice
|
315 |
+
- Buzzer
|
316 |
+
- Splinter
|
317 |
+
- Writing
|
318 |
+
- Goat
|
319 |
+
- Sheep
|
320 |
+
- Heavy_metal
|
321 |
+
- Ska
|
322 |
+
- Neigh,_whinny
|
323 |
+
- Sizzle
|
324 |
+
- Rowboat,_canoe,_kayak
|
325 |
+
- Wood_block
|
326 |
+
- Clang
|
327 |
+
- Door
|
328 |
+
- Female_singing
|
329 |
+
- Stream
|
330 |
+
- Chant
|
331 |
+
- Vocal_music
|
332 |
+
- Yodeling
|
333 |
+
- Bee,_wasp,_etc.
|
334 |
+
- Air_brake
|
335 |
+
- Whir
|
336 |
+
- Bird_flight,_flapping_wings
|
337 |
+
- French_horn
|
338 |
+
- Telephone_dialing,_DTMF
|
339 |
+
- Squeak
|
340 |
+
- Sitar
|
341 |
+
- Smoke_detector,_smoke_alarm
|
342 |
+
- Tick-tock
|
343 |
+
- Gurgling
|
344 |
+
- Bellow
|
345 |
+
- Harmonic
|
346 |
+
- Male_singing
|
347 |
+
- Giggle
|
348 |
+
- Bark
|
349 |
+
- Vibration
|
350 |
+
- Drill
|
351 |
+
- Skidding
|
352 |
+
- Scratch
|
353 |
+
- Drawer_open_or_close
|
354 |
+
- Chop
|
355 |
+
- Drum_machine
|
356 |
+
- Squish
|
357 |
+
- Toilet_flush
|
358 |
+
- Fart
|
359 |
+
- Basketball_bounce
|
360 |
+
- Electronic_tuner
|
361 |
+
- Singing_bowl
|
362 |
+
- Squawk
|
363 |
+
- Conversation
|
364 |
+
- Reggae
|
365 |
+
- Funny_music
|
366 |
+
- Scrape
|
367 |
+
- Sewing_machine
|
368 |
+
- Tender_music
|
369 |
+
- Swing_music
|
370 |
+
- Dishes,_pots,_and_pans
|
371 |
+
- Sampler
|
372 |
+
- Synthesizer
|
373 |
+
- Clapping
|
374 |
+
- Hubbub,_speech_noise,_speech_babble
|
375 |
+
- Engine_knocking
|
376 |
+
- Canidae,_dogs,_wolves
|
377 |
+
- Chainsaw
|
378 |
+
- Pour
|
379 |
+
- Croak
|
380 |
+
- Chewing,_mastication
|
381 |
+
- Cowbell
|
382 |
+
- Propeller,_airscrew
|
383 |
+
- Didgeridoo
|
384 |
+
- Ringtone
|
385 |
+
- Rattle_(instrument)
|
386 |
+
- Artillery_fire
|
387 |
+
- Cash_register
|
388 |
+
- Crack
|
389 |
+
- Growling
|
390 |
+
- Mosquito
|
391 |
+
- Carnatic_music
|
392 |
+
- Honk
|
393 |
+
- Howl
|
394 |
+
- Cacophony
|
395 |
+
- Gospel_music
|
396 |
+
- Firecracker
|
397 |
+
- Strum
|
398 |
+
- Motorboat,_speedboat
|
399 |
+
- Clock
|
400 |
+
- Dance_music
|
401 |
+
- Microwave_oven
|
402 |
+
- Country
|
403 |
+
- Bluegrass
|
404 |
+
- Rattle
|
405 |
+
- Mallet_percussion
|
406 |
+
- Computer_keyboard
|
407 |
+
- Bass_guitar
|
408 |
+
- Electric_shaver,_electric_razor
|
409 |
+
- Sawing
|
410 |
+
- Owl
|
411 |
+
- Whip
|
412 |
+
- White_noise
|
413 |
+
- Chirp_tone
|
414 |
+
- Boiling
|
415 |
+
- Ship
|
416 |
+
- Mouse
|
417 |
+
- Breaking
|
418 |
+
- Silence
|
419 |
+
- Throat_clearing
|
420 |
+
- Bleat
|
421 |
+
- Salsa_music
|
422 |
+
- Patter
|
423 |
+
- Vibraphone
|
424 |
+
- Flap
|
425 |
+
- Typewriter
|
426 |
+
- Change_ringing_(campanology)
|
427 |
+
- Trickle,_dribble
|
428 |
+
- Video_game_music
|
429 |
+
- Glass
|
430 |
+
- Dial_tone
|
431 |
+
- Radio
|
432 |
+
- Bell
|
433 |
+
- Moo
|
434 |
+
- Heart_murmur
|
435 |
+
- Clatter
|
436 |
+
- Sniff
|
437 |
+
- Double_bass
|
438 |
+
- Background_music
|
439 |
+
- Lawn_mower
|
440 |
+
- Printer
|
441 |
+
- House_music
|
442 |
+
- Tearing
|
443 |
+
- Angry_music
|
444 |
+
- Male_speech,_man_speaking
|
445 |
+
- Wild_animals
|
446 |
+
- Cupboard_open_or_close
|
447 |
+
- Harpsichord
|
448 |
+
- Light_engine_(high_frequency)
|
449 |
+
- Child_singing
|
450 |
+
- Zipper_(clothing)
|
451 |
+
- Jazz
|
452 |
+
- Belly_laugh
|
453 |
+
- Roar
|
454 |
+
- Motor_vehicle_(road)
|
455 |
+
- Crowing,_cock-a-doodle-doo
|
456 |
+
- Cluck
|
457 |
+
- Sad_music
|
458 |
+
- Hi-hat
|
459 |
+
- Cough
|
460 |
+
- Stomach_rumble
|
461 |
+
- Alarm
|
462 |
+
- String_section
|
463 |
+
- Sonar
|
464 |
+
- Keys_jangling
|
465 |
+
- Synthetic_singing
|
466 |
+
- Rapping
|
467 |
+
- Sidetone
|
468 |
+
- Orchestra
|
469 |
+
- Throbbing
|
470 |
+
- Whale_vocalization
|
471 |
+
- Thunk
|
472 |
+
- Children_playing
|
473 |
+
- Snake
|
474 |
+
- Chink,_clink
|
475 |
+
- Chirp,_tweet
|
476 |
+
- Boing
|
477 |
+
- Shuffle
|
478 |
+
- Pulse
|
479 |
+
- Punk_rock
|
480 |
+
- Crow
|
481 |
+
- Caw
|
482 |
+
- Static
|
483 |
+
- Clicking
|
484 |
+
- Snicker
|
485 |
+
- Whispering
|
486 |
+
- Pink_noise
|
487 |
+
- Crushing
|
488 |
+
- Wedding_music
|
489 |
+
- Crumpling,_crinkling
|
490 |
+
- Crackle
|
491 |
+
- Whoop
|
492 |
+
- Electric_toothbrush
|
493 |
+
- Train_wheels_squealing
|
494 |
+
- Yell
|
495 |
+
- Wind_chime
|
496 |
+
- Frying_(food)
|
497 |
+
- Christmas_music
|
498 |
+
- Fill_(with_liquid)
|
499 |
+
- Reverberation
|
500 |
+
- Beatboxing
|
501 |
+
- Harmonica
|
502 |
+
- Banjo
|
503 |
+
- Sliding_door
|
504 |
+
- Groan
|
505 |
+
- Bagpipes
|
506 |
+
- Spray
|
507 |
+
- Stir
|
508 |
+
- Acoustic_guitar
|
509 |
+
- Tap
|
510 |
+
- Chorus_effect
|
511 |
+
- Noise
|
512 |
+
- Crunch
|
513 |
+
- Biting
|
514 |
+
- Aircraft_engine
|
515 |
+
- Busy_signal
|
516 |
+
- Bang
|
517 |
+
- Techno
|
518 |
+
- Tuning_fork
|
519 |
+
- Tapping_(guitar_technique)
|
520 |
+
- Pig
|
521 |
+
- Maraca
|
522 |
+
- Vacuum_cleaner
|
523 |
+
- Mandolin
|
524 |
+
- Electronica
|
525 |
+
- Theme_music
|
526 |
+
- Yip
|
527 |
+
- A_capella
|
528 |
+
- Rustle
|
529 |
+
- Chatter
|
530 |
+
- Traditional_music
|
531 |
+
- Soul_music
|
532 |
+
- Rustling_leaves
|
533 |
+
- Afrobeat
|
534 |
+
- Hoot
|
535 |
+
- Slosh
|
536 |
+
- Roaring_cats_(lions,_tigers)
|
537 |
+
- Chopping_(food)
|
538 |
+
- Heavy_engine_(low_frequency)
|
539 |
+
- Sine_wave
|
540 |
+
- Speech_synthesizer
|
541 |
+
- Middle_Eastern_music
|
542 |
+
- Music_of_Latin_America
|
543 |
+
- Arrow
|
544 |
+
- Timpani
|
545 |
+
- Eruption
|
546 |
+
- Shofar
|
547 |
+
- Jingle_bell
|
548 |
+
- Humming
|
549 |
+
- Sanding
|
550 |
+
- Female_speech,_woman_speaking
|
551 |
+
- Gong
|
552 |
+
- Rain_on_surface
|
553 |
+
- Pant
|
554 |
+
- Dubstep
|
555 |
+
- Clip-clop
|
556 |
+
- Finger_snapping
|
557 |
+
- Blender
|
558 |
+
- Drum_and_bass
|
559 |
+
- Bouncing
|
560 |
+
- Vehicle_horn,_car_horn,_honking
|
561 |
+
- Slam
|
562 |
+
- Idling
|
563 |
+
- Rhythm_and_blues
|
564 |
+
- Race_car,_auto_racing
|
565 |
+
- Single-lens_reflex_camera
|
566 |
+
- Smash,_crash
|
567 |
+
- Purr
|
568 |
+
- Shatter
|
569 |
+
- Steelpan
|
570 |
+
- Whimper_(dog)
|
571 |
+
- Power_windows,_electric_windows
|
572 |
+
- Battle_cry
|
573 |
+
- Scary_music
|
574 |
+
- Hands
|
575 |
+
- Echo
|
576 |
+
- Truck
|
577 |
+
- Buzz
|
578 |
+
- Mechanical_fan
|
579 |
+
- Plop
|
580 |
+
- Run
|
581 |
+
- Gasp
|
582 |
+
- Psychedelic_rock
|
583 |
+
- Grunt
|
584 |
+
- Helicopter
|
585 |
+
- Dental_drill,_dentist's_drill
|
586 |
+
- Babbling
|
587 |
+
- Zing
|
588 |
+
- Oink
|
589 |
+
- Soundtrack_music
|
590 |
+
- Ambulance_(siren)
|
591 |
+
- Exciting_music
|
592 |
+
- Telephone
|
593 |
+
- Jingle_(music)
|
594 |
+
- Tubular_bells
|
595 |
+
- Burping,_eructation
|
596 |
+
- Baby_laughter
|
597 |
+
- Ping
|
598 |
+
- Bow-wow
|
599 |
+
- Foghorn
|
600 |
+
- Machine_gun
|
601 |
+
- Ukulele
|
602 |
+
- Telephone_bell_ringing
|
603 |
+
- Pulleys
|
604 |
+
- Gears
|
605 |
+
- Sigh
|
606 |
+
- Coin_(dropping)
|
607 |
+
- Music_of_Africa
|
608 |
+
- Scissors
|
609 |
+
- Inside,_public_space
|
610 |
+
- Trance_music
|
611 |
+
- Roll
|
612 |
+
- Thump,_thud
|
613 |
+
- Air_conditioning
|
614 |
+
- Ding-dong
|
615 |
+
- Ratchet,_pawl
|
616 |
+
- Hair_dryer
|
617 |
+
- Shout
|
618 |
+
- Ambient_music
|
619 |
+
- Music_for_children
|
620 |
+
- Toot
|
621 |
+
- Bathtub_(filling_or_washing)
|
622 |
+
- Slap,_smack
|
623 |
+
- Chuckle,_chortle
|
624 |
+
- Traffic_noise,_roadway_noise
|
625 |
+
- Bicycle
|
626 |
+
- Whimper
|
627 |
+
- Doorbell
|
628 |
+
- Wheeze
|
629 |
+
- Sailboat,_sailing_ship
|
630 |
+
- Cap_gun
|
631 |
+
- Wail,_moan
|
632 |
+
- Rock_and_roll
|
633 |
+
- Jingle,_tinkle
|
634 |
+
- Fire_engine,_fire_truck_(siren)
|
635 |
+
- Funk
|
636 |
+
- Lullaby
|
637 |
+
- Field_recording
|
638 |
+
- Skateboard
|
639 |
+
- Steam
|
640 |
+
- Rumble
|
641 |
+
- Medium_engine_(mid_frequency)
|
642 |
+
- Sound_effect
|
643 |
+
- Flamenco
|
644 |
+
- Shuffling_cards
|
645 |
+
- Subway,_metro,_underground
|
646 |
+
- Police_car_(siren)
|
647 |
+
- Folk_music
|
648 |
+
- Crying,_sobbing
|
649 |
+
- New-age_music
|
650 |
+
- Ice_cream_truck,_ice_cream_van
|
651 |
+
- Music_of_Bollywood
|
652 |
+
- Accelerating,_revving,_vroom
|
653 |
+
- Screaming
|
654 |
+
- Motorcycle
|
655 |
+
- Engine_starting
|
656 |
+
- Train_whistle
|
657 |
+
- Car_passing_by
|
658 |
+
- Bus
|
659 |
+
- Sneeze
|
660 |
+
- Train_horn
|
661 |
+
- Air_horn,_truck_horn
|
662 |
+
- Civil_defense_siren
|
663 |
+
- Car_alarm
|
664 |
+
- Reversing_beeps
|
665 |
+
- <unk>
|
666 |
+
token_type: word
|
667 |
+
init: xavier_normal
|
668 |
+
input_size: 1
|
669 |
+
use_preprocessor: true
|
670 |
+
frontend: null
|
671 |
+
frontend_conf: {}
|
672 |
+
specaug: null
|
673 |
+
specaug_conf: {}
|
674 |
+
normalize: null
|
675 |
+
normalize_conf: {}
|
676 |
+
preencoder: null
|
677 |
+
preencoder_conf: {}
|
678 |
+
encoder: beats
|
679 |
+
encoder_conf:
|
680 |
+
beats_ckpt_path: /compute/babel-13-33/sbharad2/models/BEATs/BEATs_iter3_plus_AS20K.pt
|
681 |
+
beats_config:
|
682 |
+
layer_wise_gradient_decay_ratio: 0.3
|
683 |
+
encoder_layerdrop: 0.1
|
684 |
+
dropout: 0.0
|
685 |
+
use_weighted_representation: false
|
686 |
+
specaug_config:
|
687 |
+
apply_time_warp: true
|
688 |
+
apply_freq_mask: false
|
689 |
+
apply_time_mask: true
|
690 |
+
time_mask_width_ratio_range:
|
691 |
+
- 0
|
692 |
+
- 0.06
|
693 |
+
num_time_mask: 1
|
694 |
+
roll_augment: true
|
695 |
+
roll_interval: 1
|
696 |
+
decoder: linear
|
697 |
+
decoder_conf: {}
|
698 |
+
model: espnet
|
699 |
+
model_conf:
|
700 |
+
classification_type: multi-label
|
701 |
+
mixup_augmentation: true
|
702 |
+
lsm_weight: 0.0
|
703 |
+
required:
|
704 |
+
- output_dir
|
705 |
+
- token_list
|
706 |
+
version: '202412'
|
707 |
+
distributed: false
|
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/acc.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/backward_time.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/clip.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/forward_time.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/gpu_max_cached_mem_GB.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/grad_norm.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/iter_time.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/loss_scale.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/mAP.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim0_lr0.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/optim_step_time.png
ADDED
as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/images/train_time.png
ADDED
meta.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
espnet: '202412'
|
2 |
+
files:
|
3 |
+
classification_model_file: /compute/babel-11-13/sbharad2/beats_run/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/120epoch.pth
|
4 |
+
python: "3.9.20 (main, Oct 3 2024, 07:27:41) \n[GCC 11.2.0]"
|
5 |
+
timestamp: 1736167061.684751
|
6 |
+
torch: 2.4.0
|
7 |
+
yaml_files:
|
8 |
+
classification_train_config: /compute/babel-11-13/sbharad2/beats_run/as20k_fulltrain/exp/cls_beats_iter3p20k.allroll.Bp8p8.20250103.020644/config.yaml
|