# Generated 2023-05-14 from: # /home/agorin/cryceleb2023/hparams/ecapa_voxceleb_basic.yaml # yamllint disable # ################################ # Model: Speaker identification with ECAPA for CryCeleb # Authors: David Budaghyan # ################################ ckpt_interval_minutes: 15 # save checkpoint every N min ##### SEED seed: 3011 __set_seed: !apply:crybrain_config_utils.set_seed [3011] # DataLoader bs: 32 train_dataloader_options: batch_size: 32 shuffle: true val_dataloader_options: batch_size: 2 shuffle: false ##### ESTIMATOR COMPONENTS # Fbank (feature extractor) n_mels: 80 left_frames: 0 right_frames: 0 deltas: false compute_features: &id002 !new:speechbrain.lobes.features.Fbank n_mels: 80 left_frames: 0 right_frames: 0 deltas: false # ECAPA emb_dim: 192 embedding_model: &id001 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN input_size: 80 channels: [1024, 1024, 1024, 1024, 3072] kernel_sizes: [5, 3, 3, 3, 1] dilations: [1, 2, 3, 4, 1] groups: [1, 1, 1, 1, 1] attention_channels: 128 lin_neurons: 192 # If you do not want to use the pretrained encoder you can simply delete pretrained_encoder field. pretrained_model_name: spkrec-ecapa-voxceleb pretrained_embedding_model_path: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt pretrained_embedding_model: !new:speechbrain.utils.parameter_transfer.Pretrainer collect_in: ./experiments/ecapa_voxceleb_ft_basic/ckpts loadables: model: *id001 paths: model: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt # CLASSIFIER n_classes: 348 # check-yaml disable classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier input_size: 192 out_neurons: 348 ##### EPOCH COUNTER n_epochs: 1000 epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter limit: 1000 ##### OPTIMIZER start_lr: 0.0001 opt_class: !name:torch.optim.Adam lr: 0.0001 weight_decay: 0.000002 ##### LEARNING RATE SCHEDULERS lrsched_name: cyclic # one of: # onplateau # cyclic lr_min: 0.0000000001 lr_scheduler: &id006 !apply:crybrain_config_utils.choose_lrsched lrsched_name: cyclic #below are kwargs, only the ones relevant to the type of scheduler will be #used for initialization in `choose_lrsched` #onplateau (ReduceLROnPlateau) lr_min: 0.0000000001 factor: 0.4 patience: 10 dont_halve_until_epoch: 35 #cyclic (CyclicLRScheduler) base_lr: 0.00000001 max_lr: 0.0001 step_size: 100 mode: triangular gamma: 1.0 scale_fn: scale_mode: cycle sample_rate: 16000 mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: false modules: compute_features: *id002 embedding_model: *id001 classifier: *id003 mean_var_norm: *id004 compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin margin: 0.2 scale: 30 classification_stats: !name:speechbrain.utils.metric_stats.ClassificationStats ################################################################### ### OUTPUT PATHS ### experiment_name: ecapa_voxceleb_ft_basic # must run from the directory which contains "experiments" experiment_dir: ./experiments/ecapa_voxceleb_ft_basic train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger save_file: ./experiments/ecapa_voxceleb_ft_basic/train_log.txt checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer checkpoints_dir: ./experiments/ecapa_voxceleb_ft_basic/ckpts recoverables: embedding_model: *id001 classifier: *id003 normalizer: *id004 counter: *id005 lr_scheduler: *id006