flexthink commited on
Commit
edde4dc
1 Parent(s): 8829ad6

Update to match the latest version of SpeechBrain

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +23 -36
hyperparams.yaml CHANGED
@@ -46,6 +46,7 @@ lexicon_batch_size: 1024
46
  sentence_batch_size: 32
47
  homograph_batch_size: 32
48
  ctc_weight: 0.5
 
49
  homograph_loss_weight: 2.0
50
  lr: 0.002
51
  save_for_pretrained: true
@@ -97,7 +98,6 @@ lm_layers: 2 # number of hidden layers
97
  lm_output_neurons: 43
98
 
99
  # Beam Searcher
100
- use_language_model: false
101
  beam_search_min_decode_ratio: 0
102
  beam_search_max_decode_ratio: 1.0
103
  beam_search_beam_size: 16
@@ -268,7 +268,7 @@ modules:
268
  lin: *id010
269
  ctc_lin: *id013
270
  out: *id011
271
- word_emb:
272
  word_emb_enc: *id012
273
  model: *id014
274
  lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
@@ -281,61 +281,48 @@ lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
281
  opt_class: !name:torch.optim.Adam
282
  lr: 0.002
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  beam_searcher: &id029 !new:speechbrain.decoders.S2SRNNBeamSearcher
285
  embedding: *id008
286
  decoder: *id009
287
  linear: *id010
288
- ctc_linear: *id013
289
- bos_index: 0
290
- eos_index: 1
291
- blank_index: 2
292
  min_decode_ratio: 0
293
  max_decode_ratio: 1.0
294
  beam_size: 16
295
  eos_threshold: 10.0
296
  using_max_attn_shift: false
297
  max_attn_shift: 10
298
- coverage_penalty: 5.0
299
- ctc_weight: 0.4
300
 
301
  beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
302
  embedding: *id008
303
  decoder: *id009
304
  linear: *id010
305
- ctc_linear: *id013
306
- bos_index: 0
307
- eos_index: 1
308
- blank_index: 2
309
  min_decode_ratio: 0
310
  max_decode_ratio: 1.0
311
  beam_size: 16
312
  eos_threshold: 10.0
313
  using_max_attn_shift: false
314
  max_attn_shift: 10
315
- coverage_penalty: 5.0
316
- ctc_weight: 0.4
317
-
318
- beam_searcher_lm: !new:speechbrain.decoders.seq2seq.S2SRNNBeamSearchLM
319
- embedding: *id008
320
- decoder: *id009
321
- linear: *id010
322
- ctc_linear: *id013
323
- language_model: *id015
324
- bos_index: 0
325
- eos_index: 1
326
- blank_index: 2
327
- min_decode_ratio: 0
328
- max_decode_ratio: 1.0
329
- beam_size: 16
330
- eos_threshold: 10.0
331
- using_max_attn_shift: false
332
- max_attn_shift: 10
333
- coverage_penalty: 5.0
334
- ctc_weight: 0.4
335
- lm_weight: 0.5
336
- temperature: 1.25
337
- temperature_lm: 1.0
338
-
339
 
340
  lr_annealing: &id018 !new:speechbrain.nnet.schedulers.NewBobScheduler
341
  initial_value: 0.002
 
46
  sentence_batch_size: 32
47
  homograph_batch_size: 32
48
  ctc_weight: 0.5
49
+ ctc_window_size: 0
50
  homograph_loss_weight: 2.0
51
  lr: 0.002
52
  save_for_pretrained: true
 
98
  lm_output_neurons: 43
99
 
100
  # Beam Searcher
 
101
  beam_search_min_decode_ratio: 0
102
  beam_search_max_decode_ratio: 1.0
103
  beam_search_beam_size: 16
 
268
  lin: *id010
269
  ctc_lin: *id013
270
  out: *id011
271
+ word_emb: !ref <word_emb>
272
  word_emb_enc: *id012
273
  model: *id014
274
  lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
 
281
  opt_class: !name:torch.optim.Adam
282
  lr: 0.002
283
 
284
+ ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
285
+ eos_index: !ref <eos_index>
286
+ blank_index: !ref <blank_index>
287
+ ctc_fc: !ref <ctc_lin>
288
+ ctc_window_size: !ref <ctc_window_size>
289
+
290
+ coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
291
+ vocab_size: !ref <output_neurons>
292
+
293
+ scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
294
+ full_scorers: [!ref <coverage_scorer>, !ref <ctc_scorer>]
295
+ weights:
296
+ coverage: !ref <beam_search_coverage_penalty>
297
+ ctc: !ref <ctc_weight>
298
+
299
  beam_searcher: &id029 !new:speechbrain.decoders.S2SRNNBeamSearcher
300
  embedding: *id008
301
  decoder: *id009
302
  linear: *id010
303
+ bos_index: !ref <bos_index>
304
+ eos_index: !ref <eos_index>
 
 
305
  min_decode_ratio: 0
306
  max_decode_ratio: 1.0
307
  beam_size: 16
308
  eos_threshold: 10.0
309
  using_max_attn_shift: false
310
  max_attn_shift: 10
311
+ scorer: !ref <scorer>
 
312
 
313
  beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
314
  embedding: *id008
315
  decoder: *id009
316
  linear: *id010
317
+ bos_index: !ref <bos_index>
318
+ eos_index: !ref <eos_index>
 
 
319
  min_decode_ratio: 0
320
  max_decode_ratio: 1.0
321
  beam_size: 16
322
  eos_threshold: 10.0
323
  using_max_attn_shift: false
324
  max_attn_shift: 10
325
+ scorer: !ref <scorer>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
  lr_annealing: &id018 !new:speechbrain.nnet.schedulers.NewBobScheduler
328
  initial_value: 0.002