speechbrainteam commited on
Commit
b8f6b8f
1 Parent(s): 3c12864

Delete hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +0 -118
hyperparams.yaml DELETED
@@ -1,118 +0,0 @@
1
- # ################################
2
- # Model: wav2vec2 + DNN + CTC/Attention
3
- # Augmentation: SpecAugment
4
- # Authors: Titouan Parcollet 2021
5
- # ################################
6
-
7
- sample_rate: 16000
8
- wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
9
-
10
- # BPE parameters
11
- token_type: unigram # ["unigram", "bpe", "char"]
12
- character_coverage: 1.0
13
-
14
- # Model parameters
15
- activation: !name:torch.nn.LeakyReLU
16
- dnn_layers: 2
17
- dnn_neurons: 1024
18
- emb_size: 128
19
- dec_neurons: 1024
20
-
21
- # Outputs
22
- output_neurons: 1000 # BPE size, index(blank/eos/bos) = 0
23
-
24
- # Decoding parameters
25
- # Be sure that the bos and eos index match with the BPEs ones
26
- blank_index: 0
27
- bos_index: 1
28
- eos_index: 2
29
- min_decode_ratio: 0.0
30
- max_decode_ratio: 1.0
31
- beam_size: 10
32
- eos_threshold: 1.5
33
- using_max_attn_shift: True
34
- max_attn_shift: 140
35
- ctc_weight_decode: 0.0
36
- temperature: 1.50
37
-
38
- enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
39
- input_shape: [null, null, 1024]
40
- activation: !ref <activation>
41
- dnn_blocks: !ref <dnn_layers>
42
- dnn_neurons: !ref <dnn_neurons>
43
-
44
- wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
45
- source: !ref <wav2vec2_hub>
46
- output_norm: True
47
- freeze: True
48
- save_path: model_checkpoints
49
-
50
- emb: !new:speechbrain.nnet.embedding.Embedding
51
- num_embeddings: !ref <output_neurons>
52
- embedding_dim: !ref <emb_size>
53
-
54
- dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
55
- enc_dim: !ref <dnn_neurons>
56
- input_size: !ref <emb_size>
57
- rnn_type: gru
58
- attn_type: location
59
- hidden_size: 1024
60
- attn_dim: 1024
61
- num_layers: 1
62
- scaling: 1.0
63
- channels: 10
64
- kernel_size: 100
65
- re_init: True
66
- dropout: 0.0
67
-
68
- ctc_lin: !new:speechbrain.nnet.linear.Linear
69
- input_size: !ref <dnn_neurons>
70
- n_neurons: !ref <output_neurons>
71
-
72
- seq_lin: !new:speechbrain.nnet.linear.Linear
73
- input_size: !ref <dec_neurons>
74
- n_neurons: !ref <output_neurons>
75
-
76
- log_softmax: !new:speechbrain.nnet.activations.Softmax
77
- apply_log: True
78
-
79
- ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
80
- blank_index: !ref <blank_index>
81
-
82
- seq_cost: !name:speechbrain.nnet.losses.nll_loss
83
- label_smoothing: 0.1
84
-
85
- asr_model: !new:torch.nn.ModuleList
86
- - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
87
-
88
- tokenizer: !new:sentencepiece.SentencePieceProcessor
89
-
90
- encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
91
- wav2vec2: !ref <wav2vec2>
92
- enc: !ref <enc>
93
-
94
- decoder: !new:speechbrain.decoders.S2SRNNBeamSearcher
95
- embedding: !ref <emb>
96
- decoder: !ref <dec>
97
- linear: !ref <seq_lin>
98
- ctc_linear: !ref <ctc_lin>
99
- bos_index: !ref <bos_index>
100
- eos_index: !ref <eos_index>
101
- blank_index: !ref <blank_index>
102
- min_decode_ratio: !ref <min_decode_ratio>
103
- max_decode_ratio: !ref <max_decode_ratio>
104
- beam_size: !ref <beam_size>
105
- eos_threshold: !ref <eos_threshold>
106
- using_max_attn_shift: !ref <using_max_attn_shift>
107
- max_attn_shift: !ref <max_attn_shift>
108
- temperature: !ref <temperature>
109
-
110
- modules:
111
- encoder: !ref <encoder>
112
- decoder: !ref <decoder>
113
-
114
- pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
115
- loadables:
116
- wav2vec2: !ref <wav2vec2>
117
- asr: !ref <asr_model>
118
- tokenizer: !ref <tokenizer>