sample_rate: 16000
train_ds:
  manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
  sample_rate: 16000
  labels: null
  batch_size: 64
  shuffle: true
  time_length: 3
  augmentor:
    noise:
      manifest_path: /manifests/noise/rir_noise_manifest.json
      prob: 0.5
      min_snr_db: 0
      max_snr_db: 15
    speed:
      prob: 0.5
      sr: 16000
      resample_type: kaiser_fast
      min_speed_rate: 0.95
      max_speed_rate: 1.05
  num_workers: 15
  pin_memory: true
validation_ds:
  manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
  sample_rate: 16000
  labels: null
  batch_size: 64
  shuffle: false
  time_length: 3
  num_workers: 15
  pin_memory: true
preprocessor:
  _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
  normalize: per_feature
  window_size: 0.025
  sample_rate: 16000
  window_stride: 0.01
  window: hann
  features: 80
  n_fft: 512
  frame_splicing: 1
  dither: 1.0e-05
  stft_conv: false
spec_augment:
  _target_: nemo.collections.asr.modules.SpectrogramAugmentation
  freq_masks: 3
  freq_width: 4
  time_masks: 5
  time_width: 0.03
encoder:
  _target_: nemo.collections.asr.modules.ECAPAEncoder
  feat_in: 80
  filters:
  - 1024
  - 1024
  - 1024
  - 1024
  - 3072
  kernel_sizes:
  - 5
  - 3
  - 3
  - 3
  - 1
  dilations:
  - 1
  - 1
  - 1
  - 1
  - 1
  scale: 8
decoder:
  _target_: nemo.collections.asr.modules.SpeakerDecoder
  feat_in: 3072
  num_classes: 16681
  pool_mode: attention
  emb_sizes: 192
  angular: true
loss:
  scale: 30
  margin: 0.2
optim:
  name: sgd
  lr: 0.08
  weight_decay: 0.0002
  sched:
    name: CosineAnnealing
    warmup_ratio: 0.1
    min_lr: 0.0001
  momentum: 0.9
target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel