File size: 6,180 Bytes
60646eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# Generated 2023-02-12 from:
# /home/agorin/cssl_sound/hparams/ecapa_vgg.yaml
# yamllint disable
# File : supclr_train.yaml
# Author : Zhepei Wang <zhepeiw2@illinois.edu>
# Date : 27.01.2022
# Last Modified Date: 31.03.2022
# Last Modified By : Zhepei Wang <zhepeiw2@illinois.edu>
seed: 2022
__set_seed: !apply:torch.manual_seed [2022]
np_rng: !new:numpy.random.RandomState [2022]
resume_interrupt: false
resume_task_idx: 0
balanced_cry: false
time_stamp: 2023-02-12+21-11-02
experiment_name: ecapa_vgg
# output_folder: !ref results/<experiment_name>/<seed>
output_base: results #/home/agorin/datasets/results_cssl
output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
# Number of classes
n_classes: 308
num_tasks: 1
# cont learning setup
task_classes: &id001 !apply:utils.prepare_task_classes
num_classes: 308
num_tasks: 1
seed: 2022
replay_num_keep: 0
use_mixup: false
mixup_alpha: 0.4
train_duration: 4.0
# Training parameters
number_of_epochs: 50
batch_size: 128
# lr: 0.001
# base_lr: 0.00000001
# max_lr: !ref <lr>
# step_size: 65000
warmup_epochs: 5
warmup_lr: 0.0
base_lr: 0.015
final_lr: 5e-09
# dataset
sample_rate: 16000
data_folder: /home/agorin/datasets/VGG-Sound
label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt
prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split
root_dir: /home/agorin/datasets/VGG-Sound
output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
task_classes: *id001
train_split: 0.8
seed: 2022
train_dataloader_opts:
batch_size: 128
num_workers: 8
shuffle: true
drop_last: true
valid_dataloader_opts:
batch_size: 32
num_workers: 8
# Experiment params
auto_mix_prec: false # True # True # Set it to True for mixed precision
# Feature parameters
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
amp_to_db: false
normalize: true
win_length: 25
hop_length: 10
n_fft: 400
f_min: 0
use_time_roll: false
use_freq_shift: false
emb_dim: 256
emb_norm_type: bn
proj_norm_type: bn
# augmentation
# time_domain_aug: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
# sample_rate: !ref <sample_rate>
# # drop_chunk_count_high: 2
# # drop_chunk_noise_factor: 0.05
# speeds: [90, 95, 100, 105, 110]
# drop_freq_count_high: 4
# drop_chunk_count_high: 3
# # drop_chunk_length_low: 1000
# # drop_chunk_length_high: 5000
spec_domain_aug: !new:augmentation.TFAugmentation
time_warp: true
time_warp_window: 8
freq_mask: true
freq_mask_width: !tuple (0, 10)
n_freq_mask: 2
time_mask: true
time_mask_width: !tuple (0, 10)
n_time_mask: 2
replace_with_zero: true
time_roll: false
time_roll_limit: !tuple (0, 200)
freq_shift: false
freq_shift_limit: !tuple (-10, 10)
# Functions
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
sample_rate: 16000
n_fft: 400
win_length: 25
hop_length: 10
f_min: 0
mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
std_norm: false
embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
input_size: 80
channels: [1024, 1024, 1024, 1024, 3072]
kernel_sizes: [5, 3, 3, 3, 1]
dilations: [1, 2, 3, 4, 1]
groups: [1, 1, 1, 1, 1]
attention_channels: 128
lin_neurons: 256
# embedding_model: !new:models.pann.Cnn14
# mel_bins: !ref <n_mels>
# emb_dim: !ref <emb_dim>
# norm_type: !ref <emb_norm_type>
projector: &id005 !new:models.modules.SimSiamProjector
input_size: 256
hidden_size: 256
output_size: 256
norm_type: bn
predictor: &id006 !new:models.modules.SimSiamPredictor
input_size: 256
hidden_size: 128
norm_type: bn
classifier: &id004 !new:models.modules.Classifier
input_size: 256
output_size: 308
modules:
compute_features: *id002
embedding_model: *id003
classifier: *id004
projector: *id005
predictor: *id006
mean_var_norm: *id007
ssl_weight: 1.
compute_simclr_cost: !new:losses.SimCLRLoss
tau: 0.5
sup_weight: 0.
compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper
loss_fn: !new:torch.nn.Identity
dist_weight: 0
compute_dist_cost: !new:losses.SimCLRLoss
tau: 0.5
acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats
# opt_class: !name:torch.optim.Adam
# lr: !ref <base_lr>
# weight_decay: 0.0005
#
# lr_scheduler_fn: !name:speechbrain.nnet.schedulers.CyclicLRScheduler
# base_lr: !ref <final_lr>
# max_lr: !ref <base_lr>
# step_size: 888
opt_class: !name:torch.optim.SGD
lr: 0.015
weight_decay: 0.0005
momentum: 0.9
lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler
warmup_epochs: 5
warmup_lr: 0.0
num_epochs: 50
base_lr: 0.015
final_lr: 5e-09
steps_per_epoch: 200
constant_predictor_lr: true
epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter
limit: 50
datapoint_counter: &id008 !new:utils.DatapointCounter
#prev_checkpointer: null
#prev_checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
# checkpoints_dir: /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save/task0
# # Logging + checkpoints
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
recoverables:
embedding_model: *id003
classifier: *id004
projector: *id005
predictor: *id006
normalizer: *id007
datapoint_counter: *id008
ssl_checkpoints_dir: # /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
# wandb
use_wandb: false
train_log_frequency: 20
wandb_logger_fn: !name:utils.MyWandBLogger
initializer: !name:wandb.init
entity: CAL
project: cssl_sound
name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg
dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
reinit: true
yaml_config: hparams/vgg/supclr_train.yaml
resume: false
|