# ################################ # Model: wav2vec2 # Authors: Rudolf A. Braun 2022, Titouan Parcollet 2022 # ################################ sample_rate: 16000 # standard parameters for the BASE model latent_extractor: !new:speechbrain.lobes.models.wav2vec.W2VLatentExtractor out_channels: [512, 512, 512, 512, 512, 512, 512] # standard parameters for the BASE model latent_encoder: !new:speechbrain.lobes.models.transformer.Transformer.TransformerEncoder d_model: 768 num_layers: 12 nhead: 8 d_ffn: 3072 dropout: 0.1 layerdrop_prob: 0.0 normalize_before: True activation: !name:torch.nn.GELU # standard parameters for the BASE model encoder_wrapper: !new:speechbrain.lobes.models.wav2vec.EncoderWrapper in_dim: 512 embedding_dim: 768 latent_encoder: !ref dropout_encoder_input: 0.1 encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential latent_extractor: !ref encoder_wrapper: !ref modules: encoder: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: latent_encoder: !ref latent_extractor: !ref