Adel-Moumen commited on
Commit
6454ad9
1 Parent(s): dee4160

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +23 -13
hyperparams.yaml CHANGED
@@ -29,7 +29,6 @@ vocab_size: 5000
29
 
30
  # Outputs
31
  blank_index: 0
32
- label_smoothing: 0.0
33
  pad_index: 0
34
  bos_index: 1
35
  eos_index: 2
@@ -38,8 +37,6 @@ unk_index: 0
38
  # Decoding parameters
39
  min_decode_ratio: 0.0
40
  max_decode_ratio: 1.0
41
- valid_search_interval: 10
42
- valid_beam_size: 10
43
  test_beam_size: 66
44
  lm_weight: 0.60
45
  ctc_weight_decode: 0.50
@@ -59,7 +56,7 @@ CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
59
  residuals: (False, False, True)
60
 
61
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
62
- input_size: 640
63
  tgt_vocab: !ref <output_neurons>
64
  d_model: !ref <d_model>
65
  nhead: !ref <nhead>
@@ -95,21 +92,33 @@ seq_lin: !new:speechbrain.nnet.linear.Linear
95
  input_size: !ref <d_model>
96
  n_neurons: !ref <output_neurons>
97
 
98
- decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
99
- modules: [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
100
- bos_index: !ref <bos_index>
 
 
101
  eos_index: !ref <eos_index>
102
  blank_index: !ref <blank_index>
 
 
 
 
 
 
 
 
 
 
 
 
103
  min_decode_ratio: !ref <min_decode_ratio>
104
  max_decode_ratio: !ref <max_decode_ratio>
105
  beam_size: !ref <test_beam_size>
106
- ctc_weight: !ref <ctc_weight_decode>
107
- lm_weight: !ref <lm_weight>
108
- lm_modules: !ref <lm_model>
109
  temperature: 1.15
110
- temperature_lm: 1.15
111
  using_eos_threshold: False
112
  length_normalization: True
 
 
113
 
114
  Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
115
  transformer: !ref <Transformer>
@@ -122,7 +131,7 @@ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
122
  transformer_encoder: !ref <Tencoder>
123
 
124
  asr_model: !new:torch.nn.ModuleList
125
- - [!ref <normalizer>, !ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
126
 
127
  log_softmax: !new:torch.nn.LogSoftmax
128
  dim: -1
@@ -142,6 +151,7 @@ modules:
142
  lm_model: !ref <lm_model>
143
  encoder: !ref <encoder>
144
  decoder: !ref <decoder>
 
145
  # The pretrainer allows a mapping between pretrained files and instances that
146
  # are declared in the yaml.
147
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
@@ -149,4 +159,4 @@ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
149
  normalizer: !ref <normalizer>
150
  asr: !ref <asr_model>
151
  lm: !ref <lm_model>
152
- tokenizer: !ref <tokenizer>
 
29
 
30
  # Outputs
31
  blank_index: 0
 
32
  pad_index: 0
33
  bos_index: 1
34
  eos_index: 2
 
37
  # Decoding parameters
38
  min_decode_ratio: 0.0
39
  max_decode_ratio: 1.0
 
 
40
  test_beam_size: 66
41
  lm_weight: 0.60
42
  ctc_weight_decode: 0.50
 
56
  residuals: (False, False, True)
57
 
58
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
59
+ input_size: 1280
60
  tgt_vocab: !ref <output_neurons>
61
  d_model: !ref <d_model>
62
  nhead: !ref <nhead>
 
92
  input_size: !ref <d_model>
93
  n_neurons: !ref <output_neurons>
94
 
95
+ transformerlm_scorer: !new:speechbrain.decoders.scorer.TransformerLMScorer
96
+ language_model: !ref <lm_model>
97
+ temperature: 1.15
98
+
99
+ ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
100
  eos_index: !ref <eos_index>
101
  blank_index: !ref <blank_index>
102
+ ctc_fc: !ref <ctc_lin>
103
+
104
+ scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
105
+ full_scorers: [!ref <transformerlm_scorer>, !ref <ctc_scorer>]
106
+ weights:
107
+ transformerlm: !ref <lm_weight>
108
+ ctc: !ref <ctc_weight_decode>
109
+
110
+ decoder: !new:speechbrain.decoders.S2STransformerBeamSearcher
111
+ modules: [!ref <Transformer>, !ref <seq_lin>]
112
+ bos_index: !ref <bos_index>
113
+ eos_index: !ref <eos_index>
114
  min_decode_ratio: !ref <min_decode_ratio>
115
  max_decode_ratio: !ref <max_decode_ratio>
116
  beam_size: !ref <test_beam_size>
 
 
 
117
  temperature: 1.15
 
118
  using_eos_threshold: False
119
  length_normalization: True
120
+ scorer: !ref <scorer>
121
+
122
 
123
  Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
124
  transformer: !ref <Transformer>
 
131
  transformer_encoder: !ref <Tencoder>
132
 
133
  asr_model: !new:torch.nn.ModuleList
134
+ - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
135
 
136
  log_softmax: !new:torch.nn.LogSoftmax
137
  dim: -1
 
151
  lm_model: !ref <lm_model>
152
  encoder: !ref <encoder>
153
  decoder: !ref <decoder>
154
+
155
  # The pretrainer allows a mapping between pretrained files and instances that
156
  # are declared in the yaml.
157
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
 
159
  normalizer: !ref <normalizer>
160
  asr: !ref <asr_model>
161
  lm: !ref <lm_model>
162
+ tokenizer: !ref <tokenizer>