sahita
/

language-identification

+# Generated 2022-10-17 from:
+# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
+# yamllint disable
+################################
+# Model: language identification with ECAPA
+# Authors: Tanel Alum������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������e, 2021
+# ################################
+# Basic parameters
+seed: 1988
+__set_seed: !apply:torch.manual_seed [1988]
+output_folder: results_3lang/epaca/1988
+save_folder: results_3lang/epaca/1988/save
+train_log: results_3lang/epaca/1988/train_log.txt
+data_folder: ./
+rir_folder: ./
+shards_url: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards
+train_meta: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/train/meta.json
+val_meta: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/dev/meta.json
+train_shards: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/train/shard-{000000..000013}.tar
+val_shards: /opt/acoustic-pr/speechbrain_Voxlingua/data_shards/dev/shard-000000.tar
+# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
+#shard_cache_dir:
+ckpt_interval_minutes: 5
+# Training parameters
+number_of_epochs: 40
+lr: 0.001
+lr_final: 0.0001
+sample_rate: 16000
+sentence_len: 3 # seconds
+# Feature parameters
+n_mels: 60
+left_frames: 0
+right_frames: 0
+deltas: false
+# Number of languages
+out_n_neurons: 3
+train_dataloader_options:
+  num_workers: 0
+  batch_size: 32
+val_dataloader_options:
+  num_workers: 0
+  batch_size: 16
+# Functions
+compute_features: &id003 !new:speechbrain.lobes.features.Fbank
+  n_mels: 60
+  left_frames: 0
+  right_frames: 0
+  deltas: false
+embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
+  input_size: 60
+  channels: [1024, 1024, 1024, 1024, 3072]
+  kernel_sizes: [5, 3, 3, 3, 1]
+  dilations: [1, 2, 3, 4, 1]
+  attention_channels: 128
+  lin_neurons: 256
+classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
+  input_shape: [null, null, 256]
+  activation: !name:torch.nn.LeakyReLU
+  lin_blocks: 1
+  lin_neurons: 512
+  out_neurons: 3
+epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
+  limit: 40
+augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
+  sample_rate: 16000
+  speeds: [90, 100, 110]
+add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
+  openrir_folder: ./
+  openrir_max_noise_len: 3.0    # seconds
+  reverb_prob: 0.5
+  noise_prob: 0.8
+  noise_snr_low: 0
+  noise_snr_high: 15
+  rir_scale_factor: 1.0
+# Definition of the augmentation pipeline.
+# If concat_augment = False, the augmentation techniques are applied
+# in sequence. If concat_augment = True, all the augmented signals
+# # are concatenated in a single big batch.
+augment_pipeline: [*id001, *id002]
+concat_augment: false
+mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization
+  norm_type: sentence
+  std_norm: false
+modules:
+  compute_features: *id003
+  augment_speed: *id001
+  add_rev_noise: *id002
+  embedding_model: *id004
+  classifier: *id005
+  mean_var_norm: *id006
+compute_cost: !name:speechbrain.nnet.losses.nll_loss
+# compute_error: !name:speechbrain.nnet.losses.classification_error
+opt_class: !name:torch.optim.Adam
+  lr: 0.001
+  weight_decay: 0.000002
+lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
+  initial_value: 0.001
+  final_value: 0.0001
+  epoch_count: 40
+# Logging + checkpoints
+train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
+  save_file: results_3lang/epaca/1988/train_log.txt
+error_stats: !name:speechbrain.utils.metric_stats.MetricStats
+  metric: !name:speechbrain.nnet.losses.classification_error
+    reduction: batch
+checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
+  checkpoints_dir: results_3lang/epaca/1988/save
+  recoverables:
+    embedding_model: *id004
+    classifier: *id005
+    normalizer: *id006
+    counter: *id007