sangeet2020 commited on
Commit
c38aa75
·
1 Parent(s): fbfec9c

Create hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +91 -0
hyperparams.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2022-08-12 from:
2
+ # /netscratch/sagar/thesis/speechbrain/recipes/CommonVoice_de/ASR/CTC/hparams/train_with_wav2vec.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: wav2vec2 + DNN + CTC
6
+ # Augmentation: SpecAugment
7
+ # Authors: Sung-Lin Yeh 2021
8
+ # ################################
9
+
10
+ # BPE parameters
11
+ token_type: char # ["unigram", "bpe", "char"]
12
+ character_coverage: 1.0
13
+
14
+ # Model parameters
15
+ # activation: !name:torch.nn.LeakyReLU
16
+ dnn_neurons: 1024
17
+ wav2vec_output_dim: 1024
18
+ dropout: 0.15
19
+
20
+ sample_rate: 16000
21
+
22
+ wav2vec2_hub: microsoft/wavlm-large
23
+
24
+ # Outputs
25
+ output_neurons: 32 # BPE size, index(blank/eos/bos) = 0
26
+
27
+ # Decoding parameters
28
+ # Be sure that the bos and eos index match with the BPEs ones
29
+ blank_index: 0
30
+ bos_index: 1
31
+ eos_index: 2
32
+
33
+ enc: !new:speechbrain.nnet.containers.Sequential
34
+ input_shape: [null, null, !ref <wav2vec_output_dim>]
35
+ linear1: !name:speechbrain.nnet.linear.Linear
36
+ n_neurons: !ref <dnn_neurons>
37
+ bias: True
38
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
39
+ activation: !new:torch.nn.LeakyReLU
40
+ drop: !new:torch.nn.Dropout
41
+ p: !ref <dropout>
42
+ linear2: !name:speechbrain.nnet.linear.Linear
43
+ n_neurons: !ref <dnn_neurons>
44
+ bias: True
45
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
46
+ activation2: !new:torch.nn.LeakyReLU
47
+ drop2: !new:torch.nn.Dropout
48
+ p: !ref <dropout>
49
+ linear3: !name:speechbrain.nnet.linear.Linear
50
+ n_neurons: !ref <dnn_neurons>
51
+ bias: True
52
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
53
+ activation3: !new:torch.nn.LeakyReLU
54
+
55
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
56
+ source: !ref <wav2vec2_hub>
57
+ output_norm: True
58
+ freeze: True
59
+ save_path: wav2vec2_checkpoint
60
+
61
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
62
+ input_size: !ref <dnn_neurons>
63
+ n_neurons: !ref <output_neurons>
64
+
65
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
66
+ apply_log: True
67
+
68
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
69
+ blank_index: !ref <blank_index>
70
+
71
+ asr_model: !new:torch.nn.ModuleList
72
+ - [!ref <enc>, !ref <ctc_lin>]
73
+
74
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
75
+
76
+ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
77
+ wav2vec2: !ref <wav2vec2>
78
+ enc: !ref <enc>
79
+ ctc_lin: !ref <ctc_lin>
80
+
81
+ modules:
82
+ encoder: !ref <encoder>
83
+
84
+ decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
85
+ blank_id: !ref <blank_index>
86
+
87
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
88
+ loadables:
89
+ wav2vec2: !ref <wav2vec2>
90
+ asr: !ref <asr_model>
91
+ tokenizer: !ref <tokenizer>