# Generated 2022-09-22 from: # /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml # yamllint disable ################################ # Model: language identification with ECAPA # Authors: Tanel Alum������������������e, 2021 # ################################ # Basic parameters seed: 1988 __set_seed: !apply:torch.manual_seed [1988] output_folder: results/epaca/1988 save_folder: results/epaca/1988/save train_log: results/epaca/1988/train_log.txt data_folder: ./ rir_folder: ./ shards_url: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards train_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/meta.json val_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/meta.json train_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/shard-{000000..000009}.tar val_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/shard-000000.tar # Set to directory on a large disk if you are training on Webdataset shards hosted on the web #shard_cache_dir: ckpt_interval_minutes: 5 # Training parameters number_of_epochs: 1 lr: 0.001 lr_final: 0.0001 sample_rate: 16000 sentence_len: 3 # seconds # Feature parameters n_mels: 60 left_frames: 0 right_frames: 0 deltas: false # Number of languages out_n_neurons: 2 train_dataloader_options: num_workers: 2 batch_size: 128 val_dataloader_options: num_workers: 0 batch_size: 32 # Functions compute_features: &id003 !new:speechbrain.lobes.features.Fbank n_mels: 60 left_frames: 0 right_frames: 0 deltas: false embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN input_size: 60 channels: [1024, 1024, 1024, 1024, 3072] kernel_sizes: [5, 3, 3, 3, 1] dilations: [1, 2, 3, 4, 1] attention_channels: 128 lin_neurons: 256 classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier input_shape: [null, null, 256] activation: !name:torch.nn.LeakyReLU lin_blocks: 1 lin_neurons: 512 out_neurons: 2 epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter limit: 1 augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment sample_rate: 16000 speeds: [90, 100, 110] add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt openrir_folder: ./ openrir_max_noise_len: 3.0 # seconds reverb_prob: 0.5 noise_prob: 0.8 noise_snr_low: 0 noise_snr_high: 15 rir_scale_factor: 1.0 # Definition of the augmentation pipeline. # If concat_augment = False, the augmentation techniques are applied # in sequence. If concat_augment = True, all the augmented signals # # are concatenated in a single big batch. augment_pipeline: [*id001, *id002] concat_augment: false mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: false modules: compute_features: *id003 augment_speed: *id001 add_rev_noise: *id002 embedding_model: *id004 classifier: *id005 mean_var_norm: *id006 compute_cost: !name:speechbrain.nnet.losses.nll_loss # compute_error: !name:speechbrain.nnet.losses.classification_error opt_class: !name:torch.optim.Adam lr: 0.001 weight_decay: 0.000002 lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler initial_value: 0.001 final_value: 0.0001 epoch_count: 1 # Logging + checkpoints train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger save_file: results/epaca/1988/train_log.txt error_stats: !name:speechbrain.utils.metric_stats.MetricStats metric: !name:speechbrain.nnet.losses.classification_error reduction: batch checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer checkpoints_dir: results/epaca/1988/save recoverables: embedding_model: *id004 classifier: *id005 normalizer: *id006 counter: *id007