|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed: 2022 |
|
__set_seed: !apply:torch.manual_seed [2022] |
|
np_rng: !new:numpy.random.RandomState [2022] |
|
|
|
resume_interrupt: false |
|
resume_task_idx: 0 |
|
balanced_cry: false |
|
|
|
time_stamp: 2023-02-12+21-11-02 |
|
experiment_name: ecapa_vgg |
|
|
|
output_base: results |
|
output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg |
|
train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt |
|
save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save |
|
|
|
|
|
n_classes: 308 |
|
num_tasks: 1 |
|
|
|
task_classes: &id001 !apply:utils.prepare_task_classes |
|
num_classes: 308 |
|
num_tasks: 1 |
|
seed: 2022 |
|
replay_num_keep: 0 |
|
|
|
use_mixup: false |
|
mixup_alpha: 0.4 |
|
train_duration: 4.0 |
|
|
|
|
|
number_of_epochs: 50 |
|
batch_size: 128 |
|
|
|
|
|
|
|
|
|
warmup_epochs: 5 |
|
warmup_lr: 0.0 |
|
base_lr: 0.015 |
|
final_lr: 5e-09 |
|
|
|
|
|
sample_rate: 16000 |
|
|
|
data_folder: /home/agorin/datasets/VGG-Sound |
|
label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt |
|
prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split |
|
root_dir: /home/agorin/datasets/VGG-Sound |
|
output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save |
|
task_classes: *id001 |
|
train_split: 0.8 |
|
seed: 2022 |
|
|
|
train_dataloader_opts: |
|
batch_size: 128 |
|
num_workers: 8 |
|
shuffle: true |
|
drop_last: true |
|
|
|
|
|
valid_dataloader_opts: |
|
batch_size: 32 |
|
num_workers: 8 |
|
|
|
|
|
|
|
|
|
auto_mix_prec: false |
|
|
|
|
|
|
|
n_mels: 80 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
amp_to_db: false |
|
normalize: true |
|
win_length: 25 |
|
hop_length: 10 |
|
n_fft: 400 |
|
f_min: 0 |
|
use_time_roll: false |
|
use_freq_shift: false |
|
emb_dim: 256 |
|
emb_norm_type: bn |
|
proj_norm_type: bn |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spec_domain_aug: !new:augmentation.TFAugmentation |
|
time_warp: true |
|
time_warp_window: 8 |
|
freq_mask: true |
|
freq_mask_width: !tuple (0, 10) |
|
n_freq_mask: 2 |
|
time_mask: true |
|
time_mask_width: !tuple (0, 10) |
|
n_time_mask: 2 |
|
replace_with_zero: true |
|
time_roll: false |
|
time_roll_limit: !tuple (0, 200) |
|
freq_shift: false |
|
freq_shift_limit: !tuple (-10, 10) |
|
|
|
|
|
|
|
compute_features: &id002 !new:speechbrain.lobes.features.Fbank |
|
n_mels: 80 |
|
left_frames: 0 |
|
right_frames: 0 |
|
deltas: false |
|
sample_rate: 16000 |
|
n_fft: 400 |
|
win_length: 25 |
|
hop_length: 10 |
|
f_min: 0 |
|
|
|
mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization |
|
|
|
norm_type: sentence |
|
std_norm: false |
|
|
|
embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN |
|
input_size: 80 |
|
channels: [1024, 1024, 1024, 1024, 3072] |
|
kernel_sizes: [5, 3, 3, 3, 1] |
|
dilations: [1, 2, 3, 4, 1] |
|
groups: [1, 1, 1, 1, 1] |
|
attention_channels: 128 |
|
lin_neurons: 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
projector: &id005 !new:models.modules.SimSiamProjector |
|
input_size: 256 |
|
hidden_size: 256 |
|
output_size: 256 |
|
norm_type: bn |
|
|
|
predictor: &id006 !new:models.modules.SimSiamPredictor |
|
input_size: 256 |
|
hidden_size: 128 |
|
norm_type: bn |
|
|
|
classifier: &id004 !new:models.modules.Classifier |
|
input_size: 256 |
|
output_size: 308 |
|
|
|
modules: |
|
compute_features: *id002 |
|
embedding_model: *id003 |
|
classifier: *id004 |
|
projector: *id005 |
|
predictor: *id006 |
|
mean_var_norm: *id007 |
|
ssl_weight: 1. |
|
compute_simclr_cost: !new:losses.SimCLRLoss |
|
tau: 0.5 |
|
|
|
sup_weight: 0. |
|
compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper |
|
loss_fn: !new:torch.nn.Identity |
|
|
|
dist_weight: 0 |
|
compute_dist_cost: !new:losses.SimCLRLoss |
|
tau: 0.5 |
|
|
|
|
|
acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
opt_class: !name:torch.optim.SGD |
|
lr: 0.015 |
|
weight_decay: 0.0005 |
|
momentum: 0.9 |
|
|
|
lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler |
|
warmup_epochs: 5 |
|
warmup_lr: 0.0 |
|
num_epochs: 50 |
|
base_lr: 0.015 |
|
final_lr: 5e-09 |
|
steps_per_epoch: 200 |
|
constant_predictor_lr: true |
|
|
|
epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter |
|
limit: 50 |
|
|
|
datapoint_counter: &id008 !new:utils.DatapointCounter |
|
|
|
|
|
|
|
|
|
|
|
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
|
checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save |
|
|
|
recoverables: |
|
embedding_model: *id003 |
|
classifier: *id004 |
|
projector: *id005 |
|
predictor: *id006 |
|
normalizer: *id007 |
|
datapoint_counter: *id008 |
|
ssl_checkpoints_dir: |
|
|
|
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger |
|
save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt |
|
|
|
|
|
use_wandb: false |
|
train_log_frequency: 20 |
|
wandb_logger_fn: !name:utils.MyWandBLogger |
|
initializer: !name:wandb.init |
|
entity: CAL |
|
project: cssl_sound |
|
name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg |
|
dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg |
|
reinit: true |
|
yaml_config: hparams/vgg/supclr_train.yaml |
|
resume: false |
|
|
|
|