Alexichamba commited on
Commit
cf0636c
1 Parent(s): b890acf

initial commit

Browse files
29epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1247a732be7b3376cbffb67208d20503012c92c38b0438d4b558dc3f3115bf6a
3
+ size 67292784
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ # IDLS24 TEAM33
6
+ ## Rawnet3
7
+
8
+ Architecture: [RawNet3](https://arxiv.org/pdf/2203.08488.pdf)
9
+
10
+ Results on Vox1-O, after training on VoxCeleb1-dev
11
+
12
+ | EER (%) | minDCF|
13
+ |---------|-------|
14
+ |3.181| 0.218 |
15
+
config.yaml ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_rawnet3.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: true
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: sequence
8
+ output_dir: exp/spk_train_rawnet3_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 6
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: true
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 40
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - eer
41
+ - min
42
+ keep_nbest_models: 3
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 9999
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: true
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: null
71
+ batch_size: 64
72
+ valid_batch_size: 32
73
+ batch_bins: 1000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/spk_stats_16k/train/speech_shape
77
+ valid_shape_file:
78
+ - exp/spk_stats_16k/valid/speech_shape
79
+ batch_type: folded
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 120000
83
+ sort_in_batch: descending
84
+ shuffle_within_batch: false
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ chunk_excluded_key_prefixes: []
91
+ chunk_default_fs: null
92
+ train_data_path_and_name_and_type:
93
+ - - dump/raw/dev_vox1/wav.scp
94
+ - speech
95
+ - sound
96
+ - - dump/raw/dev_vox1/utt2spk
97
+ - spk_labels
98
+ - text
99
+ valid_data_path_and_name_and_type:
100
+ - - dump/raw/test_vox1/trial.scp
101
+ - speech
102
+ - sound
103
+ - - dump/raw/test_vox1/trial2.scp
104
+ - speech2
105
+ - sound
106
+ - - dump/raw/test_vox1/trial_label
107
+ - spk_labels
108
+ - text
109
+ allow_variable_data_keys: false
110
+ max_cache_size: 0.0
111
+ max_cache_fd: 32
112
+ allow_multi_rates: false
113
+ valid_max_cache_size: null
114
+ exclude_weight_decay: false
115
+ exclude_weight_decay_conf: {}
116
+ optim: adam
117
+ optim_conf:
118
+ lr: 0.001
119
+ weight_decay: 5.0e-05
120
+ amsgrad: false
121
+ scheduler: cosineannealingwarmuprestarts
122
+ scheduler_conf:
123
+ first_cycle_steps: 11195
124
+ cycle_mult: 1.0
125
+ max_lr: 0.001
126
+ min_lr: 5.0e-06
127
+ warmup_steps: 1000
128
+ gamma: 0.75
129
+ init: null
130
+ use_preprocessor: true
131
+ input_size: null
132
+ target_duration: 3.0
133
+ spk2utt: dump/raw/dev_vox1/spk2utt
134
+ spk_num: 1211
135
+ sample_rate: 16000
136
+ num_eval: 10
137
+ rir_scp: ''
138
+ model_conf:
139
+ extract_feats_in_collect_stats: false
140
+ frontend: asteroid_frontend
141
+ frontend_conf:
142
+ sinc_stride: 16
143
+ sinc_kernel_size: 251
144
+ sinc_filters: 256
145
+ preemph_coef: 0.97
146
+ log_term: 1.0e-06
147
+ specaug: null
148
+ specaug_conf: {}
149
+ normalize: null
150
+ normalize_conf: {}
151
+ encoder: rawnet3
152
+ encoder_conf:
153
+ model_scale: 8
154
+ ndim: 1024
155
+ output_size: 1536
156
+ pooling: chn_attn_stat
157
+ pooling_conf: {}
158
+ projector: rawnet3
159
+ projector_conf:
160
+ output_size: 192
161
+ preprocessor: spk
162
+ preprocessor_conf:
163
+ target_duration: 3.0
164
+ sample_rate: 16000
165
+ num_eval: 5
166
+ noise_apply_prob: 0.5
167
+ noise_info:
168
+ - - 1.0
169
+ - dump/raw/musan_speech.scp
170
+ - - 4
171
+ - 7
172
+ - - 13
173
+ - 20
174
+ - - 1.0
175
+ - dump/raw/musan_noise.scp
176
+ - - 1
177
+ - 1
178
+ - - 0
179
+ - 15
180
+ - - 1.0
181
+ - dump/raw/musan_music.scp
182
+ - - 1
183
+ - 1
184
+ - - 5
185
+ - 15
186
+ rir_apply_prob: 0.5
187
+ rir_scp: dump/raw/rirs.scp
188
+ loss: aamsoftmax_sc_topk
189
+ loss_conf:
190
+ margin: 0.3
191
+ scale: 30
192
+ K: 3
193
+ mp: 0.06
194
+ k_top: 5
195
+ required:
196
+ - output_dir
197
+ version: '202402'
198
+ distributed: false
images/backward_time.png ADDED
images/clip.png ADDED
images/eer.png ADDED
images/forward_time.png ADDED
images/gpu_max_cached_mem_GB.png ADDED
images/grad_norm.png ADDED
images/iter_time.png ADDED
images/loss.png ADDED
images/loss_scale.png ADDED
images/mindcf.png ADDED
images/n_trials.png ADDED
images/nontrg_mean.png ADDED
images/nontrg_std.png ADDED
images/optim0_lr0.png ADDED
images/optim_step_time.png ADDED
images/train_time.png ADDED
images/trg_mean.png ADDED
images/trg_std.png ADDED