File size: 6,180 Bytes
60646eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# Generated 2023-02-12 from:
# /home/agorin/cssl_sound/hparams/ecapa_vgg.yaml
# yamllint disable
# File              : supclr_train.yaml
# Author            : Zhepei Wang <zhepeiw2@illinois.edu>
# Date              : 27.01.2022
# Last Modified Date: 31.03.2022
# Last Modified By  : Zhepei Wang <zhepeiw2@illinois.edu>


seed: 2022
__set_seed: !apply:torch.manual_seed [2022]
np_rng: !new:numpy.random.RandomState [2022]

resume_interrupt: false
resume_task_idx: 0
balanced_cry: false

time_stamp: 2023-02-12+21-11-02
experiment_name: ecapa_vgg
# output_folder: !ref results/<experiment_name>/<seed>
output_base: results #/home/agorin/datasets/results_cssl
output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save

# Number of classes
n_classes: 308
num_tasks: 1
# cont learning setup
task_classes: &id001 !apply:utils.prepare_task_classes
  num_classes: 308
  num_tasks: 1
  seed: 2022
replay_num_keep: 0

use_mixup: false
mixup_alpha: 0.4
train_duration: 4.0

# Training parameters
number_of_epochs: 50
batch_size: 128
# lr: 0.001
# base_lr: 0.00000001
# max_lr: !ref <lr>
# step_size: 65000
warmup_epochs: 5
warmup_lr: 0.0
base_lr: 0.015
final_lr: 5e-09

# dataset
sample_rate: 16000

data_folder: /home/agorin/datasets/VGG-Sound
label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt
prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split
  root_dir: /home/agorin/datasets/VGG-Sound
  output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
  task_classes: *id001
  train_split: 0.8
  seed: 2022

train_dataloader_opts:
  batch_size: 128
  num_workers: 8
  shuffle: true
  drop_last: true


valid_dataloader_opts:
  batch_size: 32
  num_workers: 8



# Experiment params
auto_mix_prec: false # True # True # Set it to True for mixed precision


# Feature parameters
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
amp_to_db: false
normalize: true
win_length: 25
hop_length: 10
n_fft: 400
f_min: 0
use_time_roll: false
use_freq_shift: false
emb_dim: 256
emb_norm_type: bn
proj_norm_type: bn

# augmentation
# time_domain_aug: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
#     sample_rate: !ref <sample_rate>
#     # drop_chunk_count_high: 2
#     # drop_chunk_noise_factor: 0.05
#     speeds: [90, 95, 100, 105, 110]
#     drop_freq_count_high: 4
#     drop_chunk_count_high: 3
#     # drop_chunk_length_low: 1000
#     # drop_chunk_length_high: 5000
spec_domain_aug: !new:augmentation.TFAugmentation
  time_warp: true
  time_warp_window: 8
  freq_mask: true
  freq_mask_width: !tuple (0, 10)
  n_freq_mask: 2
  time_mask: true
  time_mask_width: !tuple (0, 10)
  n_time_mask: 2
  replace_with_zero: true
  time_roll: false
  time_roll_limit: !tuple (0, 200)
  freq_shift: false
  freq_shift_limit: !tuple (-10, 10)


# Functions
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
  n_mels: 80
  left_frames: 0
  right_frames: 0
  deltas: false
  sample_rate: 16000
  n_fft: 400
  win_length: 25
  hop_length: 10
  f_min: 0

mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization

  norm_type: sentence
  std_norm: false

embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
  input_size: 80
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  groups: [1, 1, 1, 1, 1]
  attention_channels: 128
  lin_neurons: 256

# embedding_model: !new:models.pann.Cnn14
# mel_bins: !ref <n_mels>
# emb_dim: !ref <emb_dim>
# norm_type: !ref <emb_norm_type>

projector: &id005 !new:models.modules.SimSiamProjector
  input_size: 256
  hidden_size: 256
  output_size: 256
  norm_type: bn

predictor: &id006 !new:models.modules.SimSiamPredictor
  input_size: 256
  hidden_size: 128
  norm_type: bn

classifier: &id004 !new:models.modules.Classifier
  input_size: 256
  output_size: 308

modules:
  compute_features: *id002
  embedding_model: *id003
  classifier: *id004
  projector: *id005
  predictor: *id006
  mean_var_norm: *id007
ssl_weight: 1.
compute_simclr_cost: !new:losses.SimCLRLoss
  tau: 0.5

sup_weight: 0.
compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper
  loss_fn: !new:torch.nn.Identity

dist_weight: 0
compute_dist_cost: !new:losses.SimCLRLoss
  tau: 0.5


acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats

# opt_class: !name:torch.optim.Adam
#     lr: !ref <base_lr>
#     weight_decay: 0.0005
#
# lr_scheduler_fn: !name:speechbrain.nnet.schedulers.CyclicLRScheduler
#     base_lr: !ref <final_lr>
#     max_lr: !ref <base_lr>
#     step_size: 888

opt_class: !name:torch.optim.SGD
  lr: 0.015
  weight_decay: 0.0005
  momentum: 0.9

lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler
  warmup_epochs: 5
  warmup_lr: 0.0
  num_epochs: 50
  base_lr: 0.015
  final_lr: 5e-09
  steps_per_epoch: 200
  constant_predictor_lr: true

epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter
  limit: 50

datapoint_counter: &id008 !new:utils.DatapointCounter

#prev_checkpointer: null
#prev_checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
#     checkpoints_dir: /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save/task0
# # Logging + checkpoints
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save

recoverables:
  embedding_model: *id003
  classifier: *id004
  projector: *id005
  predictor: *id006
  normalizer: *id007
  datapoint_counter: *id008
ssl_checkpoints_dir:      # /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save

train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt

# wandb
use_wandb: false
train_log_frequency: 20
wandb_logger_fn: !name:utils.MyWandBLogger
  initializer: !name:wandb.init
  entity: CAL
  project: cssl_sound
  name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg
  dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
  reinit: true
  yaml_config: hparams/vgg/supclr_train.yaml
  resume: false