Alexichamba commited on
Commit
2d9c6d2
1 Parent(s): b1f6433

initial commit

Browse files
36epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68355225016ff029e259817bb062167c7725f6682cc835c93823ab5ec72383f9
3
+ size 137292312
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ # IDLS24 TEAM33
6
+ ## Attempt at implementing Branch-ECAPA-TDNN
7
+
8
+ Architecture: [Branch-ECAPA-TDNN](https://www.isca-archive.org/interspeech_2023/yao23_interspeech.html)
9
+
10
+ Results on Vox1-O, after training on VoxCeleb1-dev
11
+
12
+ | EER (%) | minDCF|
13
+ |---------|-------|
14
+ |2.654| 0.175 |
15
+
16
+ eer=3.525, mindcf=0.243
config.yaml ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_branch_ecapa_mel.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: true
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: sequence
8
+ output_dir: exp/spk_train_branch_ecapa_mel_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 6
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: true
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 50
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - eer
41
+ - min
42
+ keep_nbest_models: 3
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 9999
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: true
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: null
71
+ batch_size: 64
72
+ valid_batch_size: 32
73
+ batch_bins: 1000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/spk_stats_16k/train/speech_shape
77
+ valid_shape_file:
78
+ - exp/spk_stats_16k/valid/speech_shape
79
+ batch_type: folded
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 120000
83
+ sort_in_batch: descending
84
+ shuffle_within_batch: false
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ chunk_excluded_key_prefixes: []
91
+ chunk_default_fs: null
92
+ train_data_path_and_name_and_type:
93
+ - - dump/raw/dev_vox1/wav.scp
94
+ - speech
95
+ - sound
96
+ - - dump/raw/dev_vox1/utt2spk
97
+ - spk_labels
98
+ - text
99
+ valid_data_path_and_name_and_type:
100
+ - - dump/raw/test_vox1/trial.scp
101
+ - speech
102
+ - sound
103
+ - - dump/raw/test_vox1/trial2.scp
104
+ - speech2
105
+ - sound
106
+ - - dump/raw/test_vox1/trial_label
107
+ - spk_labels
108
+ - text
109
+ allow_variable_data_keys: false
110
+ max_cache_size: 0.0
111
+ max_cache_fd: 32
112
+ allow_multi_rates: false
113
+ valid_max_cache_size: null
114
+ exclude_weight_decay: false
115
+ exclude_weight_decay_conf: {}
116
+ optim: adam
117
+ optim_conf:
118
+ lr: 1.0e-05
119
+ weight_decay: 5.0e-05
120
+ amsgrad: false
121
+ scheduler: reducelronplateau
122
+ scheduler_conf:
123
+ mode: min
124
+ factor: 0.5
125
+ patience: 1
126
+ min_lr: 1.0e-09
127
+ threshold: 0.01
128
+ eps: 1.0e-09
129
+ init: null
130
+ use_preprocessor: true
131
+ input_size: null
132
+ target_duration: 3.0
133
+ spk2utt: dump/raw/dev_vox1/spk2utt
134
+ spk_num: 1211
135
+ sample_rate: 16000
136
+ num_eval: 10
137
+ rir_scp: ''
138
+ model_conf:
139
+ extract_feats_in_collect_stats: false
140
+ frontend: melspec_torch
141
+ frontend_conf:
142
+ preemp: true
143
+ n_fft: 512
144
+ log: true
145
+ win_length: 400
146
+ hop_length: 160
147
+ n_mels: 80
148
+ normalize: mn
149
+ specaug: null
150
+ specaug_conf: {}
151
+ normalize: null
152
+ normalize_conf: {}
153
+ encoder: branch_ecapa_tdnn
154
+ encoder_conf:
155
+ model_scale: 8
156
+ ndim: 1024
157
+ output_size: 1536
158
+ num_heads: 2
159
+ dropout_rate: 0.4
160
+ merge_method: concat
161
+ pooling: chn_attn_stat
162
+ pooling_conf: {}
163
+ projector: rawnet3
164
+ projector_conf:
165
+ output_size: 192
166
+ preprocessor: spk
167
+ preprocessor_conf:
168
+ target_duration: 3.0
169
+ sample_rate: 16000
170
+ num_eval: 5
171
+ noise_apply_prob: 0.5
172
+ noise_info:
173
+ - - 1.0
174
+ - dump/raw/musan_speech.scp
175
+ - - 4
176
+ - 7
177
+ - - 13
178
+ - 20
179
+ - - 1.0
180
+ - dump/raw/musan_noise.scp
181
+ - - 1
182
+ - 1
183
+ - - 0
184
+ - 15
185
+ - - 1.0
186
+ - dump/raw/musan_music.scp
187
+ - - 1
188
+ - 1
189
+ - - 5
190
+ - 15
191
+ rir_apply_prob: 0.5
192
+ rir_scp: dump/raw/rirs.scp
193
+ loss: aamsoftmax_sc_topk
194
+ loss_conf:
195
+ margin: 0.3
196
+ scale: 30
197
+ K: 3
198
+ mp: 0.06
199
+ k_top: 5
200
+ required:
201
+ - output_dir
202
+ version: '202402'
203
+ distributed: false
images/backward_time.png ADDED
images/clip.png ADDED
images/eer.png ADDED
images/forward_time.png ADDED
images/gpu_max_cached_mem_GB.png ADDED
images/grad_norm.png ADDED
images/iter_time.png ADDED
images/loss.png ADDED
images/loss_scale.png ADDED
images/mindcf.png ADDED
images/n_trials.png ADDED
images/nontrg_mean.png ADDED
images/nontrg_std.png ADDED
images/optim0_lr0.png ADDED
images/optim_step_time.png ADDED
images/train_time.png ADDED
images/trg_mean.png ADDED
images/trg_std.png ADDED