nairaxo commited on
Commit
fdf3f65
1 Parent(s): ca69692

Upload hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +97 -0
hyperparams.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: wav2vec2 + DNN + CTC/Attention
3
+ # Augmentation: SpecAugment
4
+ # Authors: Titouan Parcollet 2021
5
+ # ################################
6
+
7
+ sample_rate: 16000
8
+ wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
9
+ # wav2vec2_hub: facebook/wav2vec2-xls-r-300m
10
+
11
+ # BPE parameters
12
+ token_type: char # ["unigram", "bpe", "char"]
13
+ character_coverage: 1.0
14
+
15
+ # Model parameters
16
+ activation: !name:torch.nn.LeakyReLU
17
+ dnn_layers: 2
18
+ dnn_neurons: 1024
19
+ emb_size: 128
20
+ dec_neurons: 1024
21
+
22
+ # Outputs
23
+ output_neurons: 51 # BPE size, index(blank/eos/bos) = 0
24
+
25
+ # Decoding parameters
26
+ # Be sure that the bos and eos index match with the BPEs ones
27
+ blank_index: 0
28
+ bos_index: 1
29
+ eos_index: 2
30
+ min_decode_ratio: 0.0
31
+ max_decode_ratio: 1.0
32
+ beam_size: 80
33
+ eos_threshold: 1.5
34
+ using_max_attn_shift: True
35
+ max_attn_shift: 140
36
+ ctc_weight_decode: 0.0
37
+ temperature: 1.50
38
+
39
+ enc: !new:speechbrain.nnet.containers.Sequential
40
+ input_shape: [null, null, 1024]
41
+ linear1: !name:speechbrain.nnet.linear.Linear
42
+ n_neurons: 1024
43
+ bias: True
44
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
45
+ activation: !new:torch.nn.LeakyReLU
46
+ drop: !new:torch.nn.Dropout
47
+ p: 0.15
48
+ linear2: !name:speechbrain.nnet.linear.Linear
49
+ n_neurons: 1024
50
+ bias: True
51
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
52
+ activation2: !new:torch.nn.LeakyReLU
53
+ drop2: !new:torch.nn.Dropout
54
+ p: 0.15
55
+ linear3: !name:speechbrain.nnet.linear.Linear
56
+ n_neurons: 1024
57
+ bias: True
58
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
59
+ activation3: !new:torch.nn.LeakyReLU
60
+
61
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
62
+ source: !ref <wav2vec2_hub>
63
+ output_norm: True
64
+ freeze: True
65
+ save_path: model_checkpoints
66
+
67
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
68
+ input_size: !ref <dnn_neurons>
69
+ n_neurons: !ref <output_neurons>
70
+
71
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
72
+ apply_log: True
73
+
74
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
75
+ blank_index: !ref <blank_index>
76
+
77
+ asr_model: !new:torch.nn.ModuleList
78
+ - [!ref <enc>, !ref <ctc_lin>]
79
+
80
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
81
+
82
+ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
83
+ wav2vec2: !ref <wav2vec2>
84
+ enc: !ref <enc>
85
+ ctc_lin: !ref <ctc_lin>
86
+
87
+ decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
88
+ blank_id: !ref <blank_index>
89
+
90
+ modules:
91
+ encoder: !ref <encoder>
92
+
93
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
94
+ loadables:
95
+ wav2vec2: !ref <wav2vec2>
96
+ asr: !ref <asr_model>
97
+ tokenizer: !ref <tokenizer>