Nick256 commited on
Commit
4bc68e3
1 Parent(s): 00d9ebb

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +38 -224
hyperparams.yaml CHANGED
@@ -1,126 +1,14 @@
1
- # Generated 2024-03-06 from:
2
- # /home/marconilab/tacotron2/hparams/train.yaml
3
- # yamllint disable
4
- ############################################################################
5
- # Model: Tacotron2
6
- # Tokens: Raw characters (English text)
7
- # losses: Transducer
8
- # Training: LJSpeech
9
- # Authors: Georges Abous-Rjeili, Artem Ploujnikov, Yingzhi Wang
10
- # ############################################################################
11
-
12
-
13
- ###################################
14
- # Experiment Parameters and setup #
15
- ###################################
16
- seed: 1234
17
- __set_seed: !apply:torch.manual_seed [1234]
18
- output_folder: ./results/tacotron2/1234
19
- save_folder: ./results/tacotron2/1234/save
20
- train_log: ./results/tacotron2/1234/train_log.txt
21
- epochs: 500
22
- keep_checkpoint_interval: 50
23
- wandb_id: tacotron2-luganda
24
- wandb_user: sulaiman-kagumire
25
- wandb_project: tts-luganda
26
- init_from_pretrained: true
27
- ###################################
28
- # Progress Samples #
29
- ###################################
30
- # Progress samples are used to monitor the progress
31
- # of an ongoing training session by outputting samples
32
- # of spectrograms, alignments, etc at regular intervals
33
-
34
- # Whether to enable progress samples
35
- progress_samples: false
36
-
37
- # The path where the samples will be stored
38
- progress_sample_path: ./results/tacotron2/1234/samples
39
- # The interval, in epochs. For instance, if it is set to 5,
40
- # progress samples will be output every 5 epochs
41
- progress_samples_interval: 1
42
- # The sample size for raw batch samples saved in batch.pth
43
- # (useful mostly for model debugging)
44
- progress_batch_sample_size: 3
45
-
46
- #################################
47
- # Data files and pre-processing #
48
- #################################
49
- data_folder: data_folder
50
- # e.g, /localscratch/ljspeech
51
-
52
- train_json: ./results/tacotron2/1234/save/train.json
53
- valid_json: ./results/tacotron2/1234/save/valid.json
54
- test_json: ./results/tacotron2/1234/save/test.json
55
-
56
- splits: [train, valid, test]
57
- split_ratio: [80, 10, 10]
58
-
59
- skip_prep: false
60
-
61
- # Use the original preprocessing from nvidia
62
- # The cleaners to be used (applicable to nvidia only)
63
- text_cleaners: [basic_cleaners]
64
-
65
- ################################
66
- # Audio Parameters #
67
- ################################
68
- sample_rate: 22050
69
- hop_length: 256
70
- win_length: 1024
71
  n_mel_channels: 80
72
- n_fft: 1024
73
- mel_fmin: 0.0
74
- mel_fmax: 8000.0
75
- mel_normalized: false
76
- power: 1
77
- norm: slaney
78
- mel_scale: slaney
79
- dynamic_range_compression: true
80
-
81
- ################################
82
- # Optimization Hyperparameters #
83
- ################################
84
- learning_rate: 0.001
85
- weight_decay: 0.000006
86
- batch_size: 256
87
- num_workers: 8
88
- mask_padding: true
89
- guided_attention_sigma: 0.2
90
- guided_attention_weight: 50.0
91
- guided_attention_weight_half_life: 10.
92
- guided_attention_hard_stop: 50
93
- gate_loss_weight: 1.0
94
-
95
- train_dataloader_opts:
96
- batch_size: 256
97
- drop_last: false #True #False
98
- num_workers: 8
99
- collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
100
-
101
- valid_dataloader_opts:
102
- batch_size: 256
103
- num_workers: 8
104
- collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
105
-
106
- test_dataloader_opts:
107
- batch_size: 256
108
- num_workers: 8
109
- collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
110
-
111
- ################################
112
- # Model Parameters and model #
113
- ################################
114
- n_symbols: 148 #fixed depending on symbols in textToSequence
115
  symbols_embedding_dim: 512
116
-
117
- # Encoder parameters
118
  encoder_kernel_size: 5
119
  encoder_n_convolutions: 3
120
  encoder_embedding_dim: 512
121
-
122
- # Decoder parameters
123
- # The number of frames in the target per encoder step
 
124
  n_frames_per_step: 1
125
  decoder_rnn_dim: 1024
126
  prenet_dim: 256
@@ -128,123 +16,49 @@ max_decoder_steps: 1000
128
  gate_threshold: 0.5
129
  p_attention_dropout: 0.1
130
  p_decoder_dropout: 0.1
131
- decoder_no_early_stopping: false
132
-
133
- # Attention parameters
134
- attention_rnn_dim: 1024
135
- attention_dim: 128
136
-
137
- # Location Layer parameters
138
- attention_location_n_filters: 32
139
- attention_location_kernel_size: 31
140
-
141
- # Mel-post processing network parameters
142
  postnet_embedding_dim: 512
143
  postnet_kernel_size: 5
144
  postnet_n_convolutions: 5
 
 
145
 
146
- mel_spectogram: !name:speechbrain.lobes.models.Tacotron2.mel_spectogram
147
- sample_rate: 22050
148
- hop_length: 256
149
- win_length: 1024
150
- n_fft: 1024
151
- n_mels: 80
152
- f_min: 0.0
153
- f_max: 8000.0
154
- power: 1
155
- normalized: false
156
- norm: slaney
157
- mel_scale: slaney
158
- compression: true
159
-
160
- #model
161
- model: &id002 !new:speechbrain.lobes.models.Tacotron2.Tacotron2
162
-
163
- #optimizer
164
- mask_padding: true
165
- n_mel_channels: 80
166
  # symbols
167
- n_symbols: 148
168
- symbols_embedding_dim: 512
169
  # encoder
170
- encoder_kernel_size: 5
171
- encoder_n_convolutions: 3
172
- encoder_embedding_dim: 512
173
  # attention
174
- attention_rnn_dim: 1024
175
- attention_dim: 128
176
  # attention location
177
- attention_location_n_filters: 32
178
- attention_location_kernel_size: 31
179
  # decoder
180
- n_frames_per_step: 1
181
- decoder_rnn_dim: 1024
182
- prenet_dim: 256
183
- max_decoder_steps: 1000
184
- gate_threshold: 0.5
185
- p_attention_dropout: 0.1
186
- p_decoder_dropout: 0.1
187
  # postnet
188
- postnet_embedding_dim: 512
189
- postnet_kernel_size: 5
190
- postnet_n_convolutions: 5
191
- decoder_no_early_stopping: false
192
 
193
- guided_attention_scheduler: &id001 !new:speechbrain.nnet.schedulers.StepScheduler
194
- initial_value: 50.0
195
- half_life: 10.
196
-
197
- criterion: !new:speechbrain.lobes.models.Tacotron2.Loss
198
- gate_loss_weight: 1.0
199
- guided_attention_weight: 50.0
200
- guided_attention_sigma: 0.2
201
- guided_attention_scheduler: *id001
202
- guided_attention_hard_stop: 50
203
 
204
  modules:
205
- model: *id002
206
- opt_class: !name:torch.optim.Adam
207
- lr: 0.001
208
- weight_decay: 0.000006
209
-
210
- #epoch object
211
- epoch_counter: &id003 !new:speechbrain.utils.epoch_loop.EpochCounter
212
- limit: 500
213
-
214
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
215
- save_file: !ref <train_log>
216
- # train_logger: !new:speechbrain.utils.train_logger.WandBLogger
217
- # initializer: !name:wandb.init
218
- # # id: !ref <wandb_id>
219
- # name: tacotron2-luganda
220
- # entity: sulaiman-kagumire
221
- # project: tts-luganda
222
- # reinit: true
223
- # # yaml_config: hparams/train.yaml
224
- # resume: allow
225
-
226
- #annealing_function
227
- lr_annealing: &id004 !new:speechbrain.nnet.schedulers.IntervalScheduler
228
-
229
- #infer: !name:speechbrain.lobes.models.Tacotron2.infer
230
-
231
- intervals:
232
- - steps: 6000
233
- lr: 0.0005
234
- - steps: 8000
235
- lr: 0.0003
236
- - steps: 10000
237
- lr: 0.0001
238
 
239
- #checkpointer
240
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
241
- checkpoints_dir: ./results/tacotron2/1234/save
242
- recoverables:
243
- model: *id002
244
- counter: *id003
245
- scheduler: *id004
246
- progress_sample_logger: !new:speechbrain.utils.train_logger.ProgressSampleLogger
247
- output_path: ./results/tacotron2/1234/samples
248
- batch_sample_size: 3
249
- formats:
250
- raw_batch: raw
 
1
+ mask_padding: True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  n_mel_channels: 80
3
+ n_symbols: 148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  symbols_embedding_dim: 512
 
 
5
  encoder_kernel_size: 5
6
  encoder_n_convolutions: 3
7
  encoder_embedding_dim: 512
8
+ attention_rnn_dim: 1024
9
+ attention_dim: 128
10
+ attention_location_n_filters: 32
11
+ attention_location_kernel_size: 31
12
  n_frames_per_step: 1
13
  decoder_rnn_dim: 1024
14
  prenet_dim: 256
 
16
  gate_threshold: 0.5
17
  p_attention_dropout: 0.1
18
  p_decoder_dropout: 0.1
 
 
 
 
 
 
 
 
 
 
 
19
  postnet_embedding_dim: 512
20
  postnet_kernel_size: 5
21
  postnet_n_convolutions: 5
22
+ decoder_no_early_stopping: False
23
+ sample_rate: 22050
24
 
25
+ # Model
26
+ model: !new:speechbrain.lobes.models.Tacotron2.Tacotron2
27
+ mask_padding: !ref <mask_padding>
28
+ n_mel_channels: !ref <n_mel_channels>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # symbols
30
+ n_symbols: !ref <n_symbols>
31
+ symbols_embedding_dim: !ref <symbols_embedding_dim>
32
  # encoder
33
+ encoder_kernel_size: !ref <encoder_kernel_size>
34
+ encoder_n_convolutions: !ref <encoder_n_convolutions>
35
+ encoder_embedding_dim: !ref <encoder_embedding_dim>
36
  # attention
37
+ attention_rnn_dim: !ref <attention_rnn_dim>
38
+ attention_dim: !ref <attention_dim>
39
  # attention location
40
+ attention_location_n_filters: !ref <attention_location_n_filters>
41
+ attention_location_kernel_size: !ref <attention_location_kernel_size>
42
  # decoder
43
+ n_frames_per_step: !ref <n_frames_per_step>
44
+ decoder_rnn_dim: !ref <decoder_rnn_dim>
45
+ prenet_dim: !ref <prenet_dim>
46
+ max_decoder_steps: !ref <max_decoder_steps>
47
+ gate_threshold: !ref <gate_threshold>
48
+ p_attention_dropout: !ref <p_attention_dropout>
49
+ p_decoder_dropout: !ref <p_decoder_dropout>
50
  # postnet
51
+ postnet_embedding_dim: !ref <postnet_embedding_dim>
52
+ postnet_kernel_size: !ref <postnet_kernel_size>
53
+ postnet_n_convolutions: !ref <postnet_n_convolutions>
54
+ decoder_no_early_stopping: !ref <decoder_no_early_stopping>
55
 
56
+ # Function that converts the text into a sequence of valid characters.
57
+ text_to_sequence: !name:speechbrain.utils.text_to_sequence.text_to_sequence
 
 
 
 
 
 
 
 
58
 
59
  modules:
60
+ model: !ref <model>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
63
+ loadables:
64
+ model: !ref <model>