sulaimank commited on
Commit
e87343a
·
verified ·
1 Parent(s): ed26bd3

Upload 2 files

Browse files
Files changed (2) hide show
  1. hyperparams.yaml +249 -0
  2. model.ckpt +3 -0
hyperparams.yaml ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2024-10-12 from:
2
+ # /workspace/speechbrain/recipes/LJSpeech/TTS/tacotron2/hparams/train.yaml
3
+ # yamllint disable
4
+ ############################################################################
5
+ # Model: Tacotron2
6
+ # Tokens: Raw characters (English text)
7
+ # losses: Transducer
8
+ # Training: LJSpeech
9
+ # Authors: Georges Abous-Rjeili, Artem Ploujnikov, Yingzhi Wang
10
+ # ############################################################################
11
+
12
+
13
+ ###################################
14
+ # Experiment Parameters and setup #
15
+ ###################################
16
+ seed: 1234
17
+ __set_seed: !apply:speechbrain.utils.seed_everything [1234]
18
+ output_folder: ./results/tacotron2/1234
19
+ save_folder: ./results/tacotron2/1234/save
20
+ train_log: ./results/tacotron2/1234/train_log.txt
21
+ epochs: 1000
22
+ keep_checkpoint_interval: 100
23
+ wandb_id: tacotron-sk
24
+ wandb_user: sulaiman-kagumire
25
+ wandb_project: tacotron2-lg-cv
26
+ ###################################
27
+ # Progress Samples #
28
+ ###################################
29
+ # Progress samples are used to monitor the progress
30
+ # of an ongoing training session by outputting samples
31
+ # of spectrograms, alignments, etc at regular intervals
32
+
33
+ # Whether to enable progress samples
34
+ progress_samples: false
35
+
36
+ # The path where the samples will be stored
37
+ progress_sample_path: ./results/tacotron2/1234/samples
38
+ # The interval, in epochs. For instance, if it is set to 5,
39
+ # progress samples will be output every 5 epochs
40
+ progress_samples_interval: 100
41
+ # The sample size for raw batch samples saved in batch.pth
42
+ # (useful mostly for model debugging)
43
+ progress_batch_sample_size: 3
44
+
45
+ #################################
46
+ # Data files and pre-processing #
47
+ #################################
48
+ data_folder: data
49
+ # e.g, /localscratch/ljspeech
50
+
51
+ train_json: ./results/tacotron2/1234/save/train.json
52
+ valid_json: ./results/tacotron2/1234/save/valid.json
53
+ test_json: ./results/tacotron2/1234/save/test.json
54
+
55
+ splits: [train, valid]
56
+ split_ratio: [90, 10]
57
+
58
+ skip_prep: false
59
+ init_from_pretrained: true
60
+ # Use the original preprocessing from nvidia
61
+ # The cleaners to be used (applicable to nvidia only)
62
+ text_cleaners: [transliteration_cleaners]
63
+
64
+ ################################
65
+ # Audio Parameters #
66
+ ################################
67
+ sample_rate: 22050
68
+ hop_length: 256
69
+ win_length: 1024
70
+ n_mel_channels: 80
71
+ n_fft: 1024
72
+ mel_fmin: 0.0
73
+ mel_fmax: 8000.0
74
+ mel_normalized: false
75
+ power: 1
76
+ norm: slaney
77
+ mel_scale: slaney
78
+ dynamic_range_compression: true
79
+
80
+ ################################
81
+ # Optimization Hyperparameters #
82
+ ################################
83
+ learning_rate: 0.001
84
+ weight_decay: 0.000006
85
+ batch_size: 256
86
+ num_workers: 96
87
+ mask_padding: true
88
+ guided_attention_sigma: 0.2
89
+ guided_attention_weight: 50.0
90
+ guided_attention_weight_half_life: 10.
91
+ guided_attention_hard_stop: 50
92
+ gate_loss_weight: 1.0
93
+
94
+ train_dataloader_opts:
95
+ batch_size: 256
96
+ drop_last: false #True #False
97
+ num_workers: 96
98
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
99
+
100
+ valid_dataloader_opts:
101
+ batch_size: 256
102
+ num_workers: 96
103
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
104
+
105
+ test_dataloader_opts:
106
+ batch_size: 256
107
+ num_workers: 96
108
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
109
+
110
+ ################################
111
+ # Model Parameters and model #
112
+ ################################
113
+ n_symbols: 148 #fixed depending on symbols in textToSequence
114
+ symbols_embedding_dim: 512
115
+
116
+ # Encoder parameters
117
+ encoder_kernel_size: 5
118
+ encoder_n_convolutions: 3
119
+ encoder_embedding_dim: 512
120
+
121
+ # Decoder parameters
122
+ # The number of frames in the target per encoder step
123
+ n_frames_per_step: 1
124
+ decoder_rnn_dim: 1024
125
+ prenet_dim: 256
126
+ max_decoder_steps: 1000
127
+ gate_threshold: 0.5
128
+ p_attention_dropout: 0.1
129
+ p_decoder_dropout: 0.1
130
+ decoder_no_early_stopping: false
131
+
132
+ # Attention parameters
133
+ attention_rnn_dim: 1024
134
+ attention_dim: 128
135
+
136
+ # Location Layer parameters
137
+ attention_location_n_filters: 32
138
+ attention_location_kernel_size: 31
139
+
140
+ # Mel-post processing network parameters
141
+ postnet_embedding_dim: 512
142
+ postnet_kernel_size: 5
143
+ postnet_n_convolutions: 5
144
+
145
+ mel_spectogram: !name:speechbrain.lobes.models.Tacotron2.mel_spectogram
146
+ sample_rate: 22050
147
+ hop_length: 256
148
+ win_length: 1024
149
+ n_fft: 1024
150
+ n_mels: 80
151
+ f_min: 0.0
152
+ f_max: 8000.0
153
+ power: 1
154
+ normalized: false
155
+ norm: slaney
156
+ mel_scale: slaney
157
+ compression: true
158
+
159
+ #model
160
+ model: &id002 !new:speechbrain.lobes.models.Tacotron2.Tacotron2
161
+
162
+ #optimizer
163
+ mask_padding: true
164
+ n_mel_channels: 80
165
+ # symbols
166
+ n_symbols: 148
167
+ symbols_embedding_dim: 512
168
+ # encoder
169
+ encoder_kernel_size: 5
170
+ encoder_n_convolutions: 3
171
+ encoder_embedding_dim: 512
172
+ # attention
173
+ attention_rnn_dim: 1024
174
+ attention_dim: 128
175
+ # attention location
176
+ attention_location_n_filters: 32
177
+ attention_location_kernel_size: 31
178
+ # decoder
179
+ n_frames_per_step: 1
180
+ decoder_rnn_dim: 1024
181
+ prenet_dim: 256
182
+ max_decoder_steps: 1000
183
+ gate_threshold: 0.5
184
+ p_attention_dropout: 0.1
185
+ p_decoder_dropout: 0.1
186
+ # postnet
187
+ postnet_embedding_dim: 512
188
+ postnet_kernel_size: 5
189
+ postnet_n_convolutions: 5
190
+ decoder_no_early_stopping: false
191
+
192
+ guided_attention_scheduler: &id001 !new:speechbrain.nnet.schedulers.StepScheduler
193
+ initial_value: 50.0
194
+ half_life: 10.
195
+
196
+ criterion: !new:speechbrain.lobes.models.Tacotron2.Loss
197
+ gate_loss_weight: 1.0
198
+ guided_attention_weight: 50.0
199
+ guided_attention_sigma: 0.2
200
+ guided_attention_scheduler: *id001
201
+ guided_attention_hard_stop: 50
202
+
203
+ modules:
204
+ model: *id002
205
+ opt_class: !name:torch.optim.Adam
206
+ lr: 0.001
207
+ weight_decay: 0.000006
208
+
209
+ #epoch object
210
+ epoch_counter: &id003 !new:speechbrain.utils.epoch_loop.EpochCounter
211
+ limit: 1000
212
+
213
+ # train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
214
+ # save_file: !ref <train_log>
215
+ train_logger: !new:speechbrain.utils.train_logger.WandBLogger
216
+ initializer: !name:wandb.init
217
+ id: tacotron-sk
218
+ name: tacotron-sk
219
+ entity: sulaiman-kagumire
220
+ project: tacotron2-lg-cv
221
+ reinit: true
222
+ resume: allow
223
+
224
+
225
+ #annealing_function
226
+ lr_annealing: &id004 !new:speechbrain.nnet.schedulers.IntervalScheduler
227
+
228
+ #infer: !name:speechbrain.lobes.models.Tacotron2.infer
229
+
230
+ intervals:
231
+ - steps: 6000
232
+ lr: 0.0005
233
+ - steps: 8000
234
+ lr: 0.0003
235
+ - steps: 10000
236
+ lr: 0.0001
237
+
238
+ #checkpointer
239
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
240
+ checkpoints_dir: ./results/tacotron2/1234/save
241
+ recoverables:
242
+ model: *id002
243
+ counter: *id003
244
+ scheduler: *id004
245
+ progress_sample_logger: !new:speechbrain.utils.train_logger.ProgressSampleLogger
246
+ output_path: ./results/tacotron2/1234/samples
247
+ batch_sample_size: 3
248
+ formats:
249
+ raw_batch: raw
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b504509aa2b3ce066d0d827cf0be6c912140da78e07a23c1b9274c91686ecb
3
+ size 112826573