# CNN14 pre-trained on VGGSound with supervised objective # Feature parameters n_mels: 80 # Pretrain folder (HuggingFace) pretrained_path: Ubenwa/sb-cssl-vgg-sup # Output parameters out_n_neurons: 308 # Model params compute_features: !new:speechbrain.lobes.features.Fbank n_mels: 80 left_frames: 0 right_frames: 0 deltas: false sample_rate: 16000 n_fft: 400 win_length: 25 hop_length: 10 f_min: 0 mean_var_norm: !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: False embedding_model: !new:crytorch.models.components.pann.Cnn14 mel_bins: 80 emb_dim: 2048 norm_type: bn classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier input_size: 2048 out_neurons: !ref modules: compute_features: !ref mean_var_norm: !ref embedding_model: !ref classifier: !ref label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: embedding_model: !ref #classifier: !ref #label_encoder: !ref paths: embedding_model: !ref /embedding_model.ckpt #classifier: !ref /classifier.ckpt #label_encoder: !ref /label_encoder.txt