kunnark commited on
Commit
bba3e85
1 Parent(s): de2e05d

First model commit.

Browse files
Files changed (1) hide show
  1. inference_wav2vec.yaml +75 -0
inference_wav2vec.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ############################# Inference ###################################################
2
+
3
+ # #################################
4
+ # Basic inference parameters for speaker-id. We have first a network that
5
+ # computes some embeddings. On the top of that, we employ a classifier.
6
+ #
7
+ # Author:
8
+ # * Mirco Ravanelli 2021
9
+ # * Kunnar Kukk 2022
10
+ # #################################
11
+
12
+ # pretrain folders:
13
+ pretrained_path: ./
14
+
15
+ # Model parameters
16
+ sample_rate: 16000
17
+ device: 'cpu'
18
+
19
+ # Feature extraction
20
+ compute_features: !new:speechbrain.lobes.features.Fbank
21
+ n_mels: 60
22
+
23
+ ######################## Wav2Vec ########################
24
+ # URL for the wav2vec2 model.
25
+ wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
26
+ freeze_wav2vec: True # False
27
+ save_folder: ./save
28
+
29
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
30
+ source: !ref <wav2vec2_hub>
31
+ output_norm: True
32
+ freeze: !ref <freeze_wav2vec>
33
+ save_path: !ref <save_folder>/wav2vec2_checkpoint
34
+
35
+ out_neurons: 107
36
+
37
+ classifier: !new:speechbrain.lobes.models.Xvector.Classifier
38
+ input_shape: [null, null, 2048]
39
+ activation: !name:torch.nn.LeakyReLU
40
+ lin_blocks: 1
41
+ lin_neurons: 512
42
+ out_neurons: !ref <out_neurons>
43
+
44
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
45
+
46
+ attentive: !new:speechbrain.lobes.models.ECAPA_TDNN.AttentiveStatisticsPooling
47
+ channels: 1024
48
+ attention_channels: 64
49
+
50
+ modules:
51
+ wav2vec2: !ref <wav2vec2> # Added
52
+ compute_features: !ref <compute_features>
53
+ classifier: !ref <classifier>
54
+ attentive: !ref <attentive> # Added
55
+ softmax: !ref <softmax> # Added
56
+
57
+ model: !new:torch.nn.ModuleList
58
+ - [!ref <attentive>, !ref <classifier>]
59
+
60
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
61
+ loadables:
62
+ wav2vec2: !ref <wav2vec2>
63
+ classifier: !ref <classifier>
64
+ label_encoder: !ref <label_encoder>
65
+ model: !ref <model>
66
+ paths:
67
+ wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
68
+ classifier: !ref <pretrained_path>/classifier.ckpt
69
+ label_encoder: <pretrained_path>/label_encoder.txt
70
+ model: !ref <pretrained_path>/model.ckpt
71
+
72
+ ##################
73
+
74
+ softmax: !new:speechbrain.nnet.activations.Softmax
75
+ apply_log: True