Alexgichamba commited on
Commit
2c6fc2d
1 Parent(s): 083001f

initial commit

Browse files
20epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f1f9178ee2d9372623749769bd4cd8ca05fb9425e255211b7216900165eb79
3
+ size 84970210
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ # IDLS24 TEAM33
6
+ ## SKA-TDNN with 4 msSKA blocks
7
+
8
+ Results on Vox1-O, after training on VoxCeleb1-dev
9
+
10
+ | EER (%) | minDCF|
11
+ |---------|-------|
12
+ |2.659| 0.178 |
config.yaml ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_quadms_ska.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: true
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: sequence
8
+ output_dir: exp/spk_train_quadms_ska_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 4
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: true
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 40
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - eer
41
+ - min
42
+ keep_nbest_models: 3
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 9999
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: true
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_adapter: false
64
+ adapter: lora
65
+ save_strategy: all
66
+ adapter_conf: {}
67
+ pretrain_path: null
68
+ init_param: []
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: 2239
72
+ batch_size: 64
73
+ valid_batch_size: 32
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/spk_stats_16k/train/speech_shape
78
+ valid_shape_file:
79
+ - exp/spk_stats_16k/valid/speech_shape
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length:
83
+ - 120000
84
+ sort_in_batch: descending
85
+ shuffle_within_batch: false
86
+ sort_batch: descending
87
+ multiple_iterator: false
88
+ chunk_length: 500
89
+ chunk_shift_ratio: 0.5
90
+ num_cache_chunks: 1024
91
+ chunk_excluded_key_prefixes: []
92
+ chunk_default_fs: null
93
+ train_data_path_and_name_and_type:
94
+ - - dump/raw/dev_vox1/wav.scp
95
+ - speech
96
+ - sound
97
+ - - dump/raw/dev_vox1/utt2spk
98
+ - spk_labels
99
+ - text
100
+ valid_data_path_and_name_and_type:
101
+ - - dump/raw/test_vox1/trial.scp
102
+ - speech
103
+ - sound
104
+ - - dump/raw/test_vox1/trial2.scp
105
+ - speech2
106
+ - sound
107
+ - - dump/raw/test_vox1/trial_label
108
+ - spk_labels
109
+ - text
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ allow_multi_rates: false
114
+ valid_max_cache_size: null
115
+ exclude_weight_decay: false
116
+ exclude_weight_decay_conf: {}
117
+ optim: adam
118
+ optim_conf:
119
+ lr: 0.001
120
+ weight_decay: 5.0e-05
121
+ amsgrad: false
122
+ scheduler: cosineannealingwarmuprestarts
123
+ scheduler_conf:
124
+ first_cycle_steps: 11195
125
+ cycle_mult: 1.0
126
+ max_lr: 0.001
127
+ min_lr: 5.0e-06
128
+ warmup_steps: 1000
129
+ gamma: 0.75
130
+ init: null
131
+ use_preprocessor: true
132
+ input_size: null
133
+ target_duration: 3.0
134
+ spk2utt: dump/raw/dev_vox1/spk2utt
135
+ spk_num: 1211
136
+ sample_rate: 16000
137
+ num_eval: 10
138
+ rir_scp: ''
139
+ model_conf:
140
+ extract_feats_in_collect_stats: false
141
+ frontend: melspec_torch
142
+ frontend_conf:
143
+ preemp: true
144
+ n_fft: 512
145
+ log: true
146
+ win_length: 400
147
+ hop_length: 160
148
+ n_mels: 80
149
+ normalize: mn
150
+ specaug: null
151
+ specaug_conf: {}
152
+ normalize: null
153
+ normalize_conf: {}
154
+ encoder: quadms_ska_tdnn
155
+ encoder_conf:
156
+ model_scale: 8
157
+ ndim: 512
158
+ ska_dim: 128
159
+ output_size: 1536
160
+ pooling: chn_attn_stat
161
+ pooling_conf: {}
162
+ projector: ska_tdnn
163
+ projector_conf:
164
+ output_size: 192
165
+ preprocessor: spk
166
+ preprocessor_conf:
167
+ target_duration: 3.0
168
+ sample_rate: 16000
169
+ num_eval: 5
170
+ noise_apply_prob: 0.5
171
+ noise_info:
172
+ - - 1.0
173
+ - dump/raw/musan_speech.scp
174
+ - - 4
175
+ - 7
176
+ - - 13
177
+ - 20
178
+ - - 1.0
179
+ - dump/raw/musan_noise.scp
180
+ - - 1
181
+ - 1
182
+ - - 0
183
+ - 15
184
+ - - 1.0
185
+ - dump/raw/musan_music.scp
186
+ - - 1
187
+ - 1
188
+ - - 5
189
+ - 15
190
+ rir_apply_prob: 0.5
191
+ rir_scp: dump/raw/rirs.scp
192
+ loss: aamsoftmax_sc_topk
193
+ loss_conf:
194
+ margin: 0.3
195
+ scale: 30
196
+ K: 3
197
+ mp: 0.06
198
+ k_top: 5
199
+ required:
200
+ - output_dir
201
+ version: '202402'
202
+ distributed: false
images/backward_time.png ADDED
images/clip.png ADDED
images/eer.png ADDED
images/forward_time.png ADDED
images/gpu_max_cached_mem_GB.png ADDED
images/grad_norm.png ADDED
images/iter_time.png ADDED
images/loss.png ADDED
images/loss_scale.png ADDED
images/mindcf.png ADDED
images/n_trials.png ADDED
images/nontrg_mean.png ADDED
images/nontrg_std.png ADDED
images/optim0_lr0.png ADDED
images/optim_step_time.png ADDED
images/train_time.png ADDED
images/trg_mean.png ADDED
images/trg_std.png ADDED