Adel-Moumen commited on
Commit
b67e798
1 Parent(s): ec65bfe

Upload hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +124 -0
hyperparams.yaml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model: E2E ASR with Transformer
3
+ # Encoder: Transformer Encoder
4
+ # Decoder: Transformer Decoder + (CTC/ATT joint) beamsearch
5
+ # Tokens: unigram
6
+ # losses: CTC + KLdiv (Label Smoothing loss)
7
+ # Training: Tedlium2
8
+ # Authors: Adel Moumen 2023
9
+ # ############################################################################
10
+
11
+ # Feature parameters
12
+ sample_rate: 16000
13
+ n_fft: 400
14
+ n_mels: 80
15
+
16
+ ####################### Model parameters ###########################
17
+ # Transformer
18
+ d_model: 512
19
+ nhead: 8
20
+ num_encoder_layers: 18
21
+ num_decoder_layers: 6
22
+ csgu_linear_units: 3072
23
+ csgu_kernel_size: 31
24
+ transformer_dropout: 0.1
25
+ activation: !name:torch.nn.GELU
26
+ output_neurons: 500
27
+
28
+ # Outputs
29
+ blank_index: 0
30
+ label_smoothing: 0.1
31
+ pad_index: 0
32
+ bos_index: 1
33
+ eos_index: 2
34
+
35
+ # Decoding parameters
36
+ min_decode_ratio: 0.0
37
+ max_decode_ratio: 1.0
38
+ beam_size: 20
39
+ ctc_weight_decode: 0.3
40
+
41
+ ############################## models ################################
42
+
43
+ CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
44
+ input_shape: (8, 10, 80)
45
+ num_blocks: 2
46
+ num_layers_per_block: 1
47
+ out_channels: (64, 32)
48
+ kernel_sizes: (3, 3)
49
+ strides: (2, 2)
50
+ residuals: (False, False)
51
+
52
+ Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
53
+ input_size: 640
54
+ tgt_vocab: !ref <output_neurons>
55
+ d_model: !ref <d_model>
56
+ nhead: !ref <nhead>
57
+ num_encoder_layers: !ref <num_encoder_layers>
58
+ num_decoder_layers: !ref <num_decoder_layers>
59
+ dropout: !ref <transformer_dropout>
60
+ activation: !ref <activation>
61
+ branchformer_activation: !ref <activation>
62
+ encoder_module: branchformer
63
+ csgu_linear_units: !ref <csgu_linear_units>
64
+ kernel_size: !ref <csgu_kernel_size>
65
+ attention_type: RelPosMHAXL
66
+ normalize_before: True
67
+ causal: False
68
+
69
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
70
+ input_size: !ref <d_model>
71
+ n_neurons: !ref <output_neurons>
72
+
73
+ seq_lin: !new:speechbrain.nnet.linear.Linear
74
+ input_size: !ref <d_model>
75
+ n_neurons: !ref <output_neurons>
76
+
77
+ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
78
+ modules: [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
79
+ bos_index: !ref <bos_index>
80
+ eos_index: !ref <eos_index>
81
+ blank_index: !ref <blank_index>
82
+ min_decode_ratio: !ref <min_decode_ratio>
83
+ max_decode_ratio: !ref <max_decode_ratio>
84
+ beam_size: !ref <test_beam_size>
85
+ ctc_weight: !ref <ctc_weight_decode>
86
+ lm_weight: !ref <lm_weight>
87
+ lm_modules: !ref <lm_model>
88
+ temperature: 1.15
89
+ temperature_lm: 1.15
90
+ using_eos_threshold: False
91
+ length_normalization: True
92
+
93
+ log_softmax: !new:torch.nn.LogSoftmax
94
+ dim: -1
95
+
96
+ normalize: !new:speechbrain.processing.features.InputNormalization
97
+ norm_type: global
98
+ update_until_epoch: 4
99
+
100
+ compute_features: !new:speechbrain.lobes.features.Fbank
101
+ sample_rate: !ref <sample_rate>
102
+ n_fft: !ref <n_fft>
103
+ win_length: !ref <win_length>
104
+ n_mels: !ref <n_mels>
105
+
106
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
107
+
108
+ modules:
109
+ CNN: !ref <CNN>
110
+ Transformer: !ref <Transformer>
111
+ seq_lin: !ref <seq_lin>
112
+ ctc_lin: !ref <ctc_lin>
113
+ normalize: !ref <normalize>
114
+
115
+ model: !new:torch.nn.ModuleList
116
+ - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
117
+
118
+ # The pretrainer allows a mapping between pretrained files and instances that
119
+ # are declared in the yaml.
120
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
121
+ loadables:
122
+ normalizer: !ref <normalizer>
123
+ model: !ref <model>
124
+ tokenizer: !ref <tokenizer>