Mirco commited on
Commit
eca34f8
1 Parent(s): 9dd32c5

fix hyparam file

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +8 -8
hyperparams.yaml CHANGED
@@ -1,18 +1,18 @@
1
  # ############################################################################
2
- # Model: E2E ASR with Transformer
3
- # Encoder: Transformer Encoder
4
  # Decoder: Transformer Decoder + (CTC/ATT joint) beamsearch
5
  # Tokens: BPE with unigram
6
  # losses: CTC + KLdiv (Label Smoothing loss)
7
  # Training: AISHELL-1
8
- # Authors: Jianyuan Zhong, Titouan Parcollet
9
  # ############################################################################
10
 
11
  # Feature parameters
12
  sample_rate: 16000
13
  n_fft: 400
14
  n_mels: 80
15
- wav2vec2_hub: facebook/wav2vec2-large-it-voxpopuli
16
 
17
  ####################### Model parameters ###########################
18
  # Transformer
@@ -36,7 +36,7 @@ unk_index: 0
36
 
37
  # Decoding parameters
38
  min_decode_ratio: 0.0
39
- max_decode_ratio: 1.0 # 1.0
40
  valid_search_interval: 10
41
  valid_beam_size: 10
42
  test_beam_size: 10
@@ -49,7 +49,7 @@ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
49
  output_norm: True
50
  freeze: True
51
  pretrain: False # Pretraining is managed by the SpeechBrain pre-trainer.
52
- save_path: !ref <save_folder>/wav2vec2_checkpoint
53
 
54
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
55
  input_size: 1024
@@ -108,6 +108,6 @@ log_softmax: !new:torch.nn.LogSoftmax
108
 
109
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
110
  loadables:
111
- wav2vect2: !ref <wav2vect2>
112
- model: !ref <model>
113
  tokenizer: !ref <tokenizer>
 
1
  # ############################################################################
2
+ # Model: E2E ASR with Transformer + wav2vec2
3
+ # Encoder: wav2vec Encoder
4
  # Decoder: Transformer Decoder + (CTC/ATT joint) beamsearch
5
  # Tokens: BPE with unigram
6
  # losses: CTC + KLdiv (Label Smoothing loss)
7
  # Training: AISHELL-1
8
+ # Authors: Jianyuan Zhong, Titouan Parcollet, Mirco Ravanelli
9
  # ############################################################################
10
 
11
  # Feature parameters
12
  sample_rate: 16000
13
  n_fft: 400
14
  n_mels: 80
15
+ wav2vec2_hub: facebook/wav2vec2-large-100k-voxpopuli
16
 
17
  ####################### Model parameters ###########################
18
  # Transformer
 
36
 
37
  # Decoding parameters
38
  min_decode_ratio: 0.0
39
+ max_decode_ratio: 1.0
40
  valid_search_interval: 10
41
  valid_beam_size: 10
42
  test_beam_size: 10
 
49
  output_norm: True
50
  freeze: True
51
  pretrain: False # Pretraining is managed by the SpeechBrain pre-trainer.
52
+ save_path: model_checkpoints
53
 
54
  Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
55
  input_size: 1024
 
108
 
109
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
110
  loadables:
111
+ wav2vec2: !ref <wav2vec2>
112
+ model: !ref <asr_model>
113
  tokenizer: !ref <tokenizer>