lorenlugosch commited on
Commit
ad478b7
1 Parent(s): 8fa5d45

hparams with no generated pointers

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +87 -124
hyperparams.yaml CHANGED
@@ -1,6 +1,3 @@
1
- # Generated 2021-03-21 from:
2
- # /home/mila/l/lugoschl/code/fork/speechbrain/recipes/timers-and-such/direct/hparams/train.yaml
3
- # yamllint disable
4
  # ############################################################################
5
  # Model: Direct SLU
6
  # Encoder: Pre-trained ASR encoder -> LSTM
@@ -11,36 +8,7 @@
11
  # Authors: Loren Lugosch, Mirco Ravanelli 2020
12
  # ############################################################################
13
 
14
- # Seed needs to be set at top of yaml, before objects with parameters are made
15
- seed: 4
16
- __set_seed: !apply:torch.manual_seed [4]
17
- experiment: train-real-only
18
- output_folder: results/train-real-only/4
19
- save_folder: results/train-real-only/4/save
20
- train_log: results/train-real-only/4/train_log.txt
21
-
22
- # Data files
23
- data_folder: /localscratch/timers-and-such/
24
- # e.g, /localscratch/timers-and-such
25
- data_folder_rirs: /localscratch/timers-and-such/
26
- train_splits: [train-real]
27
- csv_train: results/train-real-only/4/train-type=direct.csv
28
- csv_dev_real: results/train-real-only/4/dev-real-type=direct.csv
29
- csv_dev_synth: results/train-real-only/4/dev-synth-type=direct.csv
30
- csv_test_real: results/train-real-only/4/test-real-type=direct.csv
31
- csv_test_synth: results/train-real-only/4/test-synth-type=direct.csv
32
- csv_all_real: results/train-real-only/4/all-real-type=direct.csv
33
- tokenizer_file: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model
34
- skip_prep: false
35
- ckpt_interval_minutes: 15 # save checkpoint every N min
36
- test_on_all_real: false
37
-
38
- # Training parameters
39
- number_of_epochs: 50
40
- batch_size: 16
41
- lr: 0.0003
42
  token_type: unigram # ["unigram", "bpe", "char"]
43
- sorting: random
44
 
45
  # Model parameters
46
  sample_rate: 16000
@@ -59,119 +27,114 @@ slu_beam_size: 80
59
  eos_threshold: 1.5
60
  temperature: 1.25
61
 
62
- dataloader_opts:
63
- batch_size: 16
64
- shuffle: true
65
-
66
- epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
67
-
68
- limit: 50
69
-
70
  # Models
71
  asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
72
- source: speechbrain/asr-crdnn-rnnlm-librispeech
73
- run_opts: {device: cuda:0}
74
-
75
- slu_enc: &id001 !new:speechbrain.nnet.containers.Sequential
76
- input_shape: [null, null, 512]
77
- lstm: !new:speechbrain.nnet.RNN.LSTM
78
- input_size: 512
79
- bidirectional: true
80
- hidden_size: 256
81
- num_layers: 2
82
- linear: !new:speechbrain.nnet.linear.Linear
83
- input_size: 512
84
- n_neurons: 256
85
-
86
- output_emb: &id002 !new:speechbrain.nnet.embedding.Embedding
87
- num_embeddings: 51
88
- embedding_dim: 128
89
-
90
- dec: &id003 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
91
- enc_dim: 256
92
- input_size: 128
93
- rnn_type: gru
94
- attn_type: keyvalue
95
- hidden_size: 512
96
- attn_dim: 512
97
- num_layers: 3
98
- scaling: 1.0
99
- dropout: 0.0
100
-
101
- seq_lin: &id004 !new:speechbrain.nnet.linear.Linear
102
- input_size: 512
103
- n_neurons: 51
104
-
105
- env_corrupt: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
106
-
107
- openrir_folder: /localscratch/timers-and-such/
108
- babble_prob: 0.0
109
- reverb_prob: 0.0
110
- noise_prob: 1.0
111
- noise_snr_low: 0
112
- noise_snr_high: 15
113
 
114
  modules:
115
- slu_enc: *id001
116
- output_emb: *id002
117
- dec: *id003
118
- seq_lin: *id004
119
- env_corrupt: *id005
120
- model: &id007 !new:torch.nn.ModuleList
121
- - [*id001, *id002, *id003, *id004]
122
- tokenizer: &id006 !new:sentencepiece.SentencePieceProcessor
 
 
 
123
 
124
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
125
- collect_in: results/train-real-only/4/save/TAS_tokenizer
126
- loadables:
127
- tokenizer: *id006
128
- paths:
129
- tokenizer: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model
130
 
131
  beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
132
- embedding: *id002
133
- decoder: *id003
134
- linear: *id004
135
- bos_index: 0
136
- eos_index: 0
137
- min_decode_ratio: 0.0
138
- max_decode_ratio: 10.0
139
- beam_size: 80
140
- eos_threshold: 1.5
141
- temperature: 1.25
142
- using_max_attn_shift: false
143
- max_attn_shift: 30
144
- coverage_penalty: 0.
145
 
146
  opt_class: !name:torch.optim.Adam
147
- lr: 0.0003
148
 
149
- lr_annealing: &id008 !new:speechbrain.nnet.schedulers.NewBobScheduler
150
- initial_value: 0.0003
151
- improvement_threshold: 0.0025
152
- annealing_factor: 0.8
153
- patient: 0
154
 
155
  checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
156
- checkpoints_dir: results/train-real-only/4/save
157
- recoverables:
158
- model: *id007
159
- scheduler: *id008
160
- counter: *id009
 
161
  augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
162
- sample_rate: 16000
163
- speeds: [95, 100, 105]
164
 
165
  log_softmax: !new:speechbrain.nnet.activations.Softmax
166
- apply_log: true
167
 
168
  seq_cost: !name:speechbrain.nnet.losses.nll_loss
169
- label_smoothing: 0.1
170
 
171
  train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
172
- save_file: results/train-real-only/4/train_log.txt
173
 
174
  error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
175
 
176
  cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
177
- split_tokens: true
 
 
 
 
1
  # ############################################################################
2
  # Model: Direct SLU
3
  # Encoder: Pre-trained ASR encoder -> LSTM
 
8
  # Authors: Loren Lugosch, Mirco Ravanelli 2020
9
  # ############################################################################
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  token_type: unigram # ["unigram", "bpe", "char"]
 
12
 
13
  # Model parameters
14
  sample_rate: 16000
 
27
  eos_threshold: 1.5
28
  temperature: 1.25
29
 
 
 
 
 
 
 
 
 
30
  # Models
31
  asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
32
+ source: speechbrain/asr-crdnn-rnnlm-librispeech
33
+ run_opts: {"device":"cuda:0"}
34
+
35
+ slu_enc: !new:speechbrain.nnet.containers.Sequential
36
+ input_shape: [null, null, !ref <ASR_encoder_dim>]
37
+ lstm: !new:speechbrain.nnet.RNN.LSTM
38
+ input_size: !ref <ASR_encoder_dim>
39
+ bidirectional: True
40
+ hidden_size: !ref <encoder_dim>
41
+ num_layers: 2
42
+ linear: !new:speechbrain.nnet.linear.Linear
43
+ input_size: !ref <encoder_dim> * 2
44
+ n_neurons: !ref <encoder_dim>
45
+
46
+ output_emb: !new:speechbrain.nnet.embedding.Embedding
47
+ num_embeddings: !ref <output_neurons>
48
+ embedding_dim: !ref <emb_size>
49
+
50
+ dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
51
+ enc_dim: !ref <encoder_dim>
52
+ input_size: !ref <emb_size>
53
+ rnn_type: gru
54
+ attn_type: keyvalue
55
+ hidden_size: !ref <dec_neurons>
56
+ attn_dim: 512
57
+ num_layers: 3
58
+ scaling: 1.0
59
+ dropout: 0.0
60
+
61
+ seq_lin: !new:speechbrain.nnet.linear.Linear
62
+ input_size: !ref <dec_neurons>
63
+ n_neurons: !ref <output_neurons>
64
+
65
+ env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
66
+ openrir_folder: !ref <data_folder_rirs>
67
+ babble_prob: 0.0
68
+ reverb_prob: 0.0
69
+ noise_prob: 1.0
70
+ noise_snr_low: 0
71
+ noise_snr_high: 15
 
72
 
73
  modules:
74
+ slu_enc: !ref <slu_enc>
75
+ output_emb: !ref <output_emb>
76
+ dec: !ref <dec>
77
+ seq_lin: !ref <seq_lin>
78
+ env_corrupt: !ref <env_corrupt>
79
+
80
+ model: !new:torch.nn.ModuleList
81
+ - [!ref <slu_enc>, !ref <output_emb>,
82
+ !ref <dec>, !ref <seq_lin>]
83
+
84
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
85
 
86
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
87
+ collect_in: !ref <save_folder>/TAS_tokenizer
88
+ loadables:
89
+ tokenizer: !ref <tokenizer>
90
+ paths:
91
+ tokenizer: !ref <tokenizer_file>
92
 
93
  beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
94
+ embedding: !ref <output_emb>
95
+ decoder: !ref <dec>
96
+ linear: !ref <seq_lin>
97
+ bos_index: !ref <bos_index>
98
+ eos_index: !ref <eos_index>
99
+ min_decode_ratio: !ref <min_decode_ratio>
100
+ max_decode_ratio: !ref <max_decode_ratio>
101
+ beam_size: !ref <slu_beam_size>
102
+ eos_threshold: !ref <eos_threshold>
103
+ temperature: !ref <temperature>
104
+ using_max_attn_shift: False
105
+ max_attn_shift: 30
106
+ coverage_penalty: 0.
107
 
108
  opt_class: !name:torch.optim.Adam
109
+ lr: !ref <lr>
110
 
111
+ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
112
+ initial_value: !ref <lr>
113
+ improvement_threshold: 0.0025
114
+ annealing_factor: 0.8
115
+ patient: 0
116
 
117
  checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
118
+ checkpoints_dir: !ref <save_folder>
119
+ recoverables:
120
+ model: !ref <model>
121
+ scheduler: !ref <lr_annealing>
122
+ counter: !ref <epoch_counter>
123
+
124
  augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
125
+ sample_rate: !ref <sample_rate>
126
+ speeds: [95, 100, 105]
127
 
128
  log_softmax: !new:speechbrain.nnet.activations.Softmax
129
+ apply_log: True
130
 
131
  seq_cost: !name:speechbrain.nnet.losses.nll_loss
132
+ label_smoothing: 0.1
133
 
134
  train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
135
+ save_file: !ref <train_log>
136
 
137
  error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
138
 
139
  cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
140
+ split_tokens: True