flexthink commited on
Commit
6297333
1 Parent(s): 8d8d469

G2P: Cleaned up the hparams file

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +149 -203
hyperparams.yaml CHANGED
@@ -10,60 +10,41 @@
10
 
11
  # Seed needs to be set at top of yaml, before objects with parameters are made
12
  seed: 1234
13
- __set_seed: !apply:torch.manual_seed [1234]
14
 
15
 
16
  # Tokenizers
17
- char_tokenize: false
18
  char_token_type: unigram # ["unigram", "bpe", "char"]
19
  char_token_output: 512
20
- char_token_wordwise: true
21
- phn_tokenize: false
22
  phn_token_type: unigram # ["unigram", "bpe", "char"]
23
  phn_token_output: 512 # index(blank/eos/bos/unk) = 0
24
- phn_token_wordwise: true
25
  character_coverage: 1.0
26
 
27
 
28
  phonemes_count: 43
29
  graphemes_count: 31
30
- phonemes_enable_space: true
31
-
32
- # Training Parameters
33
- lexicon_epochs: 50
34
- lexicon_ctc_epochs: 10
35
- lexicon_limit_to_stop: 50 # No stopping by default, can override
36
- lexicon_limit_warmup: 50 # No stopping by default, can override
37
- sentence_epochs: 13
38
- sentence_ctc_epochs: 10
39
- sentence_limit_to_stop: 3
40
- sentence_limit_warmup: 3
41
- homograph_epochs: 50
42
- homograph_ctc_epochs: 10
43
- homograph_limit_to_stop: 5
44
- homograph_limit_warmup: 10
45
- lexicon_batch_size: 1024
46
- sentence_batch_size: 32
47
- homograph_batch_size: 32
48
  ctc_weight: 0.5
49
  ctc_window_size: 0
50
  homograph_loss_weight: 2.0
51
- lr: 0.002
52
- save_for_pretrained: true
53
 
54
  # Model parameters
55
- output_neurons: &id004 !apply:speechbrain.utils.hparams.choice
56
-
57
- value: false
58
  choices:
59
- true: 513
60
- false: 43
61
 
62
- enc_num_embeddings: &id005 !apply:speechbrain.utils.hparams.choice
63
- value: false
64
  choices:
65
- true: 513
66
- false: 31
67
 
68
  enc_dropout: 0.5
69
  enc_neurons: 512
@@ -118,7 +99,7 @@ word_emb_dim: 768
118
  word_emb_enc_dim: 256
119
  word_emb_norm_type: batch
120
 
121
- graphemes: &id028
122
  - A
123
  - B
124
  - C
@@ -148,9 +129,7 @@ graphemes: &id028
148
  - "'"
149
  - ' '
150
 
151
- phonemes: &id001
152
-
153
-
154
  - AA
155
  - AE
156
  - AH
@@ -192,94 +171,91 @@ phonemes: &id001
192
  - ZH
193
  - ' '
194
 
195
- enc_input_dim: &id003 !apply:speechbrain.lobes.models.g2p.model.input_dim
196
- use_word_emb: true
197
- word_emb_enc_dim: 256
198
- embedding_dim: 512
199
 
 
 
200
 
201
- phn_char_map: &id002 !apply:speechbrain.lobes.models.g2p.dataio.build_token_char_map
 
202
 
 
 
 
 
 
 
203
 
204
- # Models
205
- tokens: *id001
206
- char_phn_map: &id023 !apply:speechbrain.lobes.models.g2p.dataio.flip_map
207
- map_dict: *id002
208
- enc: &id006 !new:speechbrain.nnet.RNN.LSTM
209
- input_shape: [null, null, *id003]
210
- bidirectional: true
211
- hidden_size: 512
212
- num_layers: 4
213
- dropout: 0.5
214
-
215
- lin: &id010 !new:speechbrain.nnet.linear.Linear
216
- input_size: 512
217
- n_neurons: *id004
218
  bias: false
219
 
220
- ctc_lin: &id013 !new:speechbrain.nnet.linear.Linear
221
- input_size: 1024
222
- n_neurons: *id004
223
- encoder_emb: &id007 !new:speechbrain.nnet.embedding.Embedding
224
- num_embeddings: *id005
225
- embedding_dim: 512
 
226
 
227
- emb: &id008 !new:speechbrain.nnet.embedding.Embedding
228
- num_embeddings: *id004
229
- embedding_dim: 512
230
 
231
- dec: &id009 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
232
- enc_dim: 1024
233
- input_size: 512
234
  rnn_type: gru
235
  attn_type: content
236
- dropout: 0.5
237
- hidden_size: 512
238
- attn_dim: 256
239
- num_layers: 4
240
-
241
- word_emb_enc: &id012 !new:speechbrain.lobes.models.g2p.model.WordEmbeddingEncoder
242
-
243
- word_emb_dim: 768
244
- word_emb_enc_dim: 256
245
  norm_type: batch
246
 
247
  word_emb: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
248
  init: !name:speechbrain.wordemb.transformer.TransformerWordEmbeddings
249
  model: bert-base-uncased
250
 
251
- log_softmax: &id011 !new:speechbrain.nnet.activations.Softmax
252
  apply_log: true
253
 
 
 
 
 
 
 
 
 
 
 
254
  modules:
255
- model: &id014 !new:speechbrain.lobes.models.g2p.model.AttentionSeq2Seq
256
- enc: *id006
257
- encoder_emb: *id007
258
- emb: *id008
259
- dec: *id009
260
- lin: *id010
261
- out: *id011
262
- use_word_emb: true
263
- word_emb_enc: *id012
264
- enc: *id006
265
- encoder_emb: *id007
266
- emb: *id008
267
- dec: *id009
268
- lin: *id010
269
- ctc_lin: *id013
270
- out: *id011
271
  word_emb: !ref <word_emb>
272
- word_emb_enc: *id012
273
- model: *id014
274
- lm_model: &id015 !new:speechbrain.lobes.models.RNNLM.RNNLM
275
- embedding_dim: 256
276
- rnn_layers: 2
277
- rnn_neurons: 512
278
- output_neurons: 43
279
- return_hidden: true
280
-
281
- opt_class: !name:torch.optim.Adam
282
- lr: 0.002
283
 
284
  ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
285
  eos_index: !ref <eos_index>
@@ -296,66 +272,38 @@ scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
296
  coverage: !ref <beam_search_coverage_penalty>
297
  ctc: !ref <ctc_weight>
298
 
299
- beam_searcher: &id029 !new:speechbrain.decoders.S2SRNNBeamSearcher
300
- embedding: *id008
301
- decoder: *id009
302
- linear: *id010
303
  bos_index: !ref <bos_index>
304
  eos_index: !ref <eos_index>
305
- min_decode_ratio: 0
306
- max_decode_ratio: 1.0
307
- beam_size: 16
308
- eos_threshold: 10.0
309
- using_max_attn_shift: false
310
- max_attn_shift: 10
 
311
  scorer: !ref <scorer>
312
 
313
  beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
314
- embedding: *id008
315
- decoder: *id009
316
- linear: *id010
317
  bos_index: !ref <bos_index>
318
  eos_index: !ref <eos_index>
319
- min_decode_ratio: 0
320
- max_decode_ratio: 1.0
321
- beam_size: 16
322
- eos_threshold: 10.0
323
- using_max_attn_shift: false
324
- max_attn_shift: 10
 
325
  scorer: !ref <scorer>
326
 
327
- lr_annealing: &id018 !new:speechbrain.nnet.schedulers.NewBobScheduler
328
- initial_value: 0.002
329
- improvement_threshold: 0.0
330
- annealing_factor: 0.8
331
- patient: 0
332
-
333
  homograph_extractor: !new:speechbrain.lobes.models.g2p.homograph.SubsequenceExtractor
334
 
335
- seq_cost: &id016 !name:speechbrain.nnet.losses.nll_loss
336
-
337
- label_smoothing: 0.1
338
-
339
- ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
340
- blank_index: 2
341
-
342
- seq_cost_metric: &id017 !name:speechbrain.nnet.losses.nll_loss
343
-
344
- label_smoothing: 0.1
345
- reduction: batch
346
-
347
- homograph_cost: !new:speechbrain.lobes.models.g2p.homograph.SubsequenceLoss
348
- seq_cost: *id016
349
- seq_stats: !name:speechbrain.utils.metric_stats.MetricStats
350
- metric: *id017
351
- seq_stats_homograph: !name:speechbrain.utils.metric_stats.MetricStats
352
- metric: *id017
353
- classification_stats_homograph: !name:speechbrain.utils.metric_stats.ClassificationStats
354
-
355
- per_stats: !name:speechbrain.utils.metric_stats.ErrorRateStats
356
- per_stats_homograph: !name:speechbrain.utils.metric_stats.ErrorRateStats
357
-
358
-
359
  model_output_keys:
360
  - p_seq
361
  - char_lens
@@ -368,46 +316,45 @@ phoneme_encoder: &id024 !new:speechbrain.dataio.encoder.TextEncoder
368
  grapheme_tokenizer: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
369
  init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
370
  model_dir: grapheme_tokenizer
371
- bos_id: 0
372
- eos_id: 1
373
- unk_id: 2
374
- vocab_size: 512
375
- annotation_train: tokenizer_annotation_train.json
376
  annotation_read: char
377
- model_type: unigram # ["unigram", "bpe", "char"]
378
- character_coverage: 1.0
379
  annotation_format: json
380
  text_file: grapheme_annotations.txt
381
 
382
- phoneme_tokenizer: &id022 !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
383
  init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
384
  model_dir: phoneme_tokenizer
385
- bos_id: 0
386
- eos_id: 1
387
- unk_id: 2
388
- vocab_size: 512
389
- annotation_train: tokenizer_annotation_train.json
390
  annotation_read: phn
391
- model_type: unigram # ["unigram", "bpe", "char"]
392
- character_coverage: 1.0
393
- annotation_list_to_check: [tokenizer_annotation_valid.json]
394
  annotation_format: json
395
- text_file: phoneme_annotations.txt
396
 
397
- out_phoneme_decoder_tok: &id025 !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
398
- tokenizer: *id022
399
- char_map: *id023
400
- token_space_index: 512
401
- wordwise: true
402
 
403
- out_phoneme_decoder_raw: &id026 !name:speechbrain.lobes.models.g2p.dataio.text_decode
 
404
 
405
- encoder: *id024
406
  out_phoneme_decoder: !apply:speechbrain.utils.hparams.choice
407
  value: false
408
  choices:
409
- true: *id025
410
- false: *id026
411
  encode_pipeline:
412
  batch: false
413
  use_padded_data: true
@@ -418,22 +365,22 @@ encode_pipeline:
418
  - word_emb
419
  init:
420
  - func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
421
- encoder: *id027
422
- tokens: *id028
423
- bos_index: 0
424
- eos_index: 1
425
  - func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
426
- encoder: *id024
427
- tokens: *id001
428
- bos_index: 0
429
- eos_index: 1
430
  steps:
431
  - func: !name:speechbrain.lobes.models.g2p.dataio.clean_pipeline
432
- graphemes: *id028
433
  takes: txt
434
  provides: txt_cleaned
435
  - func: !name:speechbrain.lobes.models.g2p.dataio.grapheme_pipeline
436
- grapheme_encoder: *id027
437
  takes: txt_cleaned
438
  provides:
439
  - grapheme_list
@@ -441,7 +388,7 @@ encode_pipeline:
441
  - grapheme_encoded_raw
442
 
443
  - func: !name:speechbrain.lobes.models.g2p.dataio.add_bos_eos
444
- encoder: *id027
445
  takes: grapheme_encoded_list
446
  provides:
447
  - grapheme_encoded
@@ -464,7 +411,7 @@ decode_pipeline:
464
  - phonemes
465
  steps:
466
  - func: !name:speechbrain.lobes.models.g2p.dataio.beam_search_pipeline
467
- beam_searcher: *id029
468
  takes:
469
  - char_lens
470
  - encoder_out
@@ -474,13 +421,13 @@ decode_pipeline:
474
  - func: !apply:speechbrain.utils.hparams.choice
475
  value: false
476
  choices:
477
- true: !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
478
- tokenizer: *id022
479
- char_map: *id023
480
- token_space_index: 512
481
- wordwise: true
482
- false: !name:speechbrain.lobes.models.g2p.dataio.phoneme_decoder_pipeline
483
- phoneme_encoder: *id024
484
  takes:
485
  - hyps
486
  provides:
@@ -489,6 +436,5 @@ decode_pipeline:
489
 
490
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
491
  loadables:
492
- model: *id014
493
- ctc_lin: *id013
494
-
 
10
 
11
  # Seed needs to be set at top of yaml, before objects with parameters are made
12
  seed: 1234
13
+ __set_seed: !apply:torch.manual_seed [!ref <seed>]
14
 
15
 
16
  # Tokenizers
17
+ char_tokenize: False
18
  char_token_type: unigram # ["unigram", "bpe", "char"]
19
  char_token_output: 512
20
+ char_token_wordwise: True
21
+ phn_tokenize: False
22
  phn_token_type: unigram # ["unigram", "bpe", "char"]
23
  phn_token_output: 512 # index(blank/eos/bos/unk) = 0
24
+ phn_token_wordwise: True
25
  character_coverage: 1.0
26
 
27
 
28
  phonemes_count: 43
29
  graphemes_count: 31
30
+ phonemes_enable_space: True
31
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  ctc_weight: 0.5
33
  ctc_window_size: 0
34
  homograph_loss_weight: 2.0
 
 
35
 
36
  # Model parameters
37
+ output_neurons: !apply:speechbrain.utils.hparams.choice
38
+ value: !ref <phn_tokenize>
 
39
  choices:
40
+ True: !ref <phn_token_output> + 1
41
+ False: !ref <phonemes_count>
42
 
43
+ enc_num_embeddings: !apply:speechbrain.utils.hparams.choice
44
+ value: !ref <char_tokenize>
45
  choices:
46
+ True: !ref <char_token_output> + 1
47
+ False: !ref <graphemes_count>
48
 
49
  enc_dropout: 0.5
50
  enc_neurons: 512
 
99
  word_emb_enc_dim: 256
100
  word_emb_norm_type: batch
101
 
102
+ graphemes:
103
  - A
104
  - B
105
  - C
 
129
  - "'"
130
  - ' '
131
 
132
+ phonemes:
 
 
133
  - AA
134
  - AE
135
  - AH
 
171
  - ZH
172
  - ' '
173
 
174
+ enc_input_dim: !apply:speechbrain.lobes.models.g2p.model.input_dim
175
+ use_word_emb: !ref <use_word_emb>
176
+ word_emb_enc_dim: !ref <word_emb_enc_dim>
177
+ embedding_dim: !ref <embedding_dim>
178
 
179
+ phn_char_map: !apply:speechbrain.lobes.models.g2p.dataio.build_token_char_map
180
+ tokens: !ref <phonemes>
181
 
182
+ char_phn_map: !apply:speechbrain.lobes.models.g2p.dataio.flip_map
183
+ map_dict: !ref <phn_char_map>
184
 
185
+ enc: !new:speechbrain.nnet.RNN.LSTM
186
+ input_shape: [null, null, !ref <enc_input_dim>]
187
+ bidirectional: True
188
+ hidden_size: !ref <enc_neurons>
189
+ num_layers: !ref <enc_num_layers>
190
+ dropout: !ref <enc_dropout>
191
 
192
+ lin: !new:speechbrain.nnet.linear.Linear
193
+ input_size: !ref <dec_neurons>
194
+ n_neurons: !ref <output_neurons>
 
 
 
 
 
 
 
 
 
 
 
195
  bias: false
196
 
197
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
198
+ input_size: !ref 2 * <enc_neurons>
199
+ n_neurons: !ref <output_neurons>
200
+
201
+ encoder_emb: !new:speechbrain.nnet.embedding.Embedding
202
+ num_embeddings: !ref <enc_num_embeddings>
203
+ embedding_dim: !ref <embedding_dim>
204
 
205
+ emb: !new:speechbrain.nnet.embedding.Embedding
206
+ num_embeddings: !ref <output_neurons>
207
+ embedding_dim: !ref <embedding_dim>
208
 
209
+ dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
210
+ enc_dim: !ref <enc_neurons> * 2
211
+ input_size: !ref <embedding_dim>
212
  rnn_type: gru
213
  attn_type: content
214
+ dropout: !ref <dec_dropout>
215
+ hidden_size: !ref <dec_neurons>
216
+ attn_dim: !ref <dec_att_neurons>
217
+ num_layers: !ref <dec_num_layers>
218
+
219
+ word_emb_enc: !new:speechbrain.lobes.models.g2p.model.WordEmbeddingEncoder
220
+ word_emb_dim: !ref <word_emb_dim>
221
+ word_emb_enc_dim: !ref <word_emb_enc_dim>
 
222
  norm_type: batch
223
 
224
  word_emb: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
225
  init: !name:speechbrain.wordemb.transformer.TransformerWordEmbeddings
226
  model: bert-base-uncased
227
 
228
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
229
  apply_log: true
230
 
231
+ model: !new:speechbrain.lobes.models.g2p.model.AttentionSeq2Seq
232
+ enc: !ref <enc>
233
+ encoder_emb: !ref <encoder_emb>
234
+ emb: !ref <emb>
235
+ dec: !ref <dec>
236
+ lin: !ref <lin>
237
+ out: !ref <log_softmax>
238
+ use_word_emb: !ref <use_word_emb>
239
+ word_emb_enc: !ref <word_emb_enc>
240
+
241
  modules:
242
+ model: !ref <model>
243
+ enc: !ref <enc>
244
+ encoder_emb: !ref <encoder_emb>
245
+ emb: !ref <emb>
246
+ dec: !ref <dec>
247
+ lin: !ref <lin>
248
+ ctc_lin: !ref <ctc_lin>
249
+ out: !ref <log_softmax>
 
 
 
 
 
 
 
 
250
  word_emb: !ref <word_emb>
251
+ word_emb_enc: !ref <word_emb_enc>
252
+
253
+ lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
254
+ embedding_dim: !ref <lm_emb_dim>
255
+ rnn_layers: !ref <lm_layers>
256
+ rnn_neurons: !ref <lm_rnn_size>
257
+ output_neurons: !ref <lm_output_neurons>
258
+ return_hidden: True
 
 
 
259
 
260
  ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
261
  eos_index: !ref <eos_index>
 
272
  coverage: !ref <beam_search_coverage_penalty>
273
  ctc: !ref <ctc_weight>
274
 
275
+ beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
276
+ embedding: !ref <emb>
277
+ decoder: !ref <dec>
278
+ linear: !ref <lin>
279
  bos_index: !ref <bos_index>
280
  eos_index: !ref <eos_index>
281
+ min_decode_ratio: !ref <beam_search_min_decode_ratio>
282
+ max_decode_ratio: !ref <beam_search_max_decode_ratio>
283
+ beam_size: !ref <beam_search_beam_size>
284
+ eos_threshold: !ref <beam_search_eos_threshold>
285
+ using_max_attn_shift: !ref <beam_search_using_max_attn_shift>
286
+ max_attn_shift: !ref <beam_search_max_attn_shift>
287
+ temperature: !ref <beam_search_temperature>
288
  scorer: !ref <scorer>
289
 
290
  beam_searcher_valid: !new:speechbrain.decoders.S2SRNNBeamSearcher
291
+ embedding: !ref <emb>
292
+ decoder: !ref <dec>
293
+ linear: !ref <lin>
294
  bos_index: !ref <bos_index>
295
  eos_index: !ref <eos_index>
296
+ min_decode_ratio: !ref <beam_search_min_decode_ratio>
297
+ max_decode_ratio: !ref <beam_search_max_decode_ratio>
298
+ beam_size: !ref <beam_search_beam_size>
299
+ eos_threshold: !ref <beam_search_eos_threshold>
300
+ using_max_attn_shift: !ref <beam_search_using_max_attn_shift>
301
+ max_attn_shift: !ref <beam_search_max_attn_shift>
302
+ temperature: !ref <beam_search_temperature>
303
  scorer: !ref <scorer>
304
 
 
 
 
 
 
 
305
  homograph_extractor: !new:speechbrain.lobes.models.g2p.homograph.SubsequenceExtractor
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  model_output_keys:
308
  - p_seq
309
  - char_lens
 
316
  grapheme_tokenizer: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
317
  init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
318
  model_dir: grapheme_tokenizer
319
+ bos_id: !ref <bos_index>
320
+ eos_id: !ref <eos_index>
321
+ unk_id: !ref <unk_index>
322
+ vocab_size: !ref <char_token_output>
323
+ annotation_train: null
324
  annotation_read: char
325
+ model_type: !ref <char_token_type> # ["unigram", "bpe", "char"]
326
+ character_coverage: !ref <character_coverage>
327
  annotation_format: json
328
  text_file: grapheme_annotations.txt
329
 
330
+ phoneme_tokenizer: !apply:speechbrain.lobes.models.g2p.dataio.lazy_init
331
  init: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
332
  model_dir: phoneme_tokenizer
333
+ bos_id: !ref <bos_index>
334
+ eos_id: !ref <eos_index>
335
+ unk_id: !ref <unk_index>
336
+ vocab_size: !ref <phn_token_output>
337
+ annotation_train: null
338
  annotation_read: phn
339
+ model_type: !ref <phn_token_type> # ["unigram", "bpe", "char"]
340
+ character_coverage: !ref <character_coverage>
 
341
  annotation_format: json
342
+ text_file: null
343
 
344
+ out_phoneme_decoder_tok: !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
345
+ tokenizer: !ref <phoneme_tokenizer>
346
+ char_map: !ref <char_phn_map>
347
+ token_space_index: !ref <token_space_index>
348
+ wordwise: !ref <phn_token_wordwise>
349
 
350
+ out_phoneme_decoder_raw: !name:speechbrain.lobes.models.g2p.dataio.text_decode
351
+ encoder: !ref <phoneme_encoder>
352
 
 
353
  out_phoneme_decoder: !apply:speechbrain.utils.hparams.choice
354
  value: false
355
  choices:
356
+ True: !ref <out_phoneme_decoder_tok>
357
+ False: !ref <out_phoneme_decoder_raw>
358
  encode_pipeline:
359
  batch: false
360
  use_padded_data: true
 
365
  - word_emb
366
  init:
367
  - func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
368
+ encoder: !ref <grapheme_encoder>
369
+ tokens: !ref <graphemes>
370
+ bos_index: !ref <bos_index>
371
+ eos_index: !ref <eos_index>
372
  - func: !name:speechbrain.lobes.models.g2p.dataio.enable_eos_bos
373
+ encoder: !ref <phoneme_encoder>
374
+ tokens: !ref <phonemes>
375
+ bos_index: !ref <bos_index>
376
+ eos_index: !ref <eos_index>
377
  steps:
378
  - func: !name:speechbrain.lobes.models.g2p.dataio.clean_pipeline
379
+ graphemes: !ref <graphemes>
380
  takes: txt
381
  provides: txt_cleaned
382
  - func: !name:speechbrain.lobes.models.g2p.dataio.grapheme_pipeline
383
+ grapheme_encoder: !ref <grapheme_encoder>
384
  takes: txt_cleaned
385
  provides:
386
  - grapheme_list
 
388
  - grapheme_encoded_raw
389
 
390
  - func: !name:speechbrain.lobes.models.g2p.dataio.add_bos_eos
391
+ encoder: !ref <grapheme_encoder>
392
  takes: grapheme_encoded_list
393
  provides:
394
  - grapheme_encoded
 
411
  - phonemes
412
  steps:
413
  - func: !name:speechbrain.lobes.models.g2p.dataio.beam_search_pipeline
414
+ beam_searcher: !ref <beam_searcher>
415
  takes:
416
  - char_lens
417
  - encoder_out
 
421
  - func: !apply:speechbrain.utils.hparams.choice
422
  value: false
423
  choices:
424
+ True: !apply:speechbrain.lobes.models.g2p.dataio.char_map_detokenize
425
+ tokenizer: !ref <phoneme_tokenizer>
426
+ char_map: !ref <char_phn_map>
427
+ token_space_index: !ref <token_space_index>
428
+ wordwise: !ref <phn_token_wordwise>
429
+ False: !name:speechbrain.lobes.models.g2p.dataio.phoneme_decoder_pipeline
430
+ phoneme_encoder: !ref <phoneme_encoder>
431
  takes:
432
  - hyps
433
  provides:
 
436
 
437
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
438
  loadables:
439
+ model: !ref <model>
440
+ ctc_lin: !ref <ctc_lin>