############################# Inference ################################################### # ################################# # Basic inference parameters for speaker-id. We have first a network that # computes some embeddings. On the top of that, we employ a classifier. # # Author: # * Mirco Ravanelli 2021 # * Kunnar Kukk 2022 # ################################# # pretrain folders: pretrained_path: TalTechNLP/voxlingua107-xls-r-300m-wav2vec # Model parameters sample_rate: 16000 device: 'cpu' # Feature extraction compute_features: !new:speechbrain.lobes.features.Fbank n_mels: 60 ######################## Wav2Vec ######################## # URL for the wav2vec2 model. wav2vec2_hub: facebook/wav2vec2-xls-r-300m freeze_wav2vec: True save_folder: ./save wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2 source: !ref output_norm: True freeze: !ref save_path: !ref /wav2vec2_checkpoint out_neurons: 107 classifier: !new:speechbrain.lobes.models.Xvector.Classifier input_shape: [null, null, 2048] activation: !name:torch.nn.LeakyReLU lin_blocks: 1 lin_neurons: 512 out_neurons: !ref label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder attentive: !new:speechbrain.lobes.models.ECAPA_TDNN.AttentiveStatisticsPooling channels: 1024 attention_channels: 64 modules: wav2vec2: !ref compute_features: !ref classifier: !ref attentive: !ref softmax: !ref model: !new:torch.nn.ModuleList - [!ref , !ref ] pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: wav2vec2: !ref classifier: !ref label_encoder: !ref model: !ref paths: wav2vec2: !ref /wav2vec2.ckpt classifier: !ref /classifier.ckpt label_encoder: !ref /label_encoder.txt model: !ref /model.ckpt ################## softmax: !new:speechbrain.nnet.activations.Softmax apply_log: True