File size: 2,234 Bytes

############################# Inference ###################################################

# #################################
# Basic inference parameters for speaker-id. We have first a network that
# computes some embeddings. On the top of that, we employ a classifier.
#
# Author:
#  * Mirco Ravanelli 2021
#  * Kunnar Kukk 2022
# #################################

# pretrain folders:
pretrained_path: TalTechNLP/voxlingua107-xls-r-300m-wav2vec

# Model parameters
sample_rate: 16000
device: 'cpu'

# Feature extraction
compute_features: !new:speechbrain.lobes.features.Fbank
    n_mels: 60

######################## Wav2Vec ########################
# URL for the wav2vec2 model.
wav2vec2_hub: facebook/wav2vec2-xls-r-300m
freeze_wav2vec: True 
save_folder: ./save

wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
    source: !ref <wav2vec2_hub>
    output_norm: True
    freeze: !ref <freeze_wav2vec>
    save_path: !ref <save_folder>/wav2vec2_checkpoint

out_neurons: 107

classifier: !new:speechbrain.lobes.models.Xvector.Classifier
    input_shape: [null, null, 2048]
    activation: !name:torch.nn.LeakyReLU
    lin_blocks: 1
    lin_neurons: 512
    out_neurons: !ref <out_neurons>

label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
  
attentive: !new:speechbrain.lobes.models.ECAPA_TDNN.AttentiveStatisticsPooling
    channels: 1024
    attention_channels: 64 

modules:
    wav2vec2: !ref <wav2vec2>
    compute_features: !ref <compute_features>
    classifier: !ref <classifier>
    attentive: !ref <attentive> 
    softmax: !ref <softmax>

model: !new:torch.nn.ModuleList
   - [!ref <attentive>, !ref <classifier>]

pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
    loadables:
        wav2vec2: !ref <wav2vec2>
        classifier: !ref <classifier>
        label_encoder: !ref <label_encoder>
        model: !ref <model>
    paths:
        wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
        classifier: !ref <pretrained_path>/classifier.ckpt
        label_encoder: !ref <pretrained_path>/label_encoder.txt
        model: !ref <pretrained_path>/model.ckpt

##################

softmax: !new:speechbrain.nnet.activations.Softmax
    apply_log: True