sample_rate: 16000 hop_length: 256 win_length: 1024 n_mel_channels: 80 n_fft: 1024 mel_fmin: 0.0 mel_fmax: 8000.0 mel_normalized: False power: 1 norm: "slaney" mel_scale: "slaney" dynamic_range_compression: True # Modules embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN input_size: !ref channels: [1024, 1024, 1024, 1024, 3072] kernel_sizes: [5, 3, 3, 3, 1] dilations: [1, 2, 3, 4, 1] groups: [1, 1, 1, 1, 1] attention_channels: 128 lin_neurons: 192 normalizer: !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: False modules: normalizer: !ref embedding_model: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: normalizer: !ref embedding_model: !ref