Nick256 commited on
Commit
d6bd1a2
1 Parent(s): 55016da

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.json +5 -0
  2. hyperparams.yaml +250 -0
  3. model.ckpt +3 -0
config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "speechbrain_interface": "Tacotron2",
3
+ "vocoder_interface": "HiFIGAN",
4
+ "vocoder_model_id": "speechbrain/tts-hifigan-ljspeech"
5
+ }
hyperparams.yaml ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2024-03-06 from:
2
+ # /home/marconilab/tacotron2/hparams/train.yaml
3
+ # yamllint disable
4
+ ############################################################################
5
+ # Model: Tacotron2
6
+ # Tokens: Raw characters (English text)
7
+ # losses: Transducer
8
+ # Training: LJSpeech
9
+ # Authors: Georges Abous-Rjeili, Artem Ploujnikov, Yingzhi Wang
10
+ # ############################################################################
11
+
12
+
13
+ ###################################
14
+ # Experiment Parameters and setup #
15
+ ###################################
16
+ seed: 1234
17
+ __set_seed: !apply:torch.manual_seed [1234]
18
+ output_folder: ./results/tacotron2/1234
19
+ save_folder: ./results/tacotron2/1234/save
20
+ train_log: ./results/tacotron2/1234/train_log.txt
21
+ epochs: 500
22
+ keep_checkpoint_interval: 50
23
+ wandb_id: tacotron2-luganda
24
+ wandb_user: sulaiman-kagumire
25
+ wandb_project: tts-luganda
26
+ init_from_pretrained: true
27
+ ###################################
28
+ # Progress Samples #
29
+ ###################################
30
+ # Progress samples are used to monitor the progress
31
+ # of an ongoing training session by outputting samples
32
+ # of spectrograms, alignments, etc at regular intervals
33
+
34
+ # Whether to enable progress samples
35
+ progress_samples: false
36
+
37
+ # The path where the samples will be stored
38
+ progress_sample_path: ./results/tacotron2/1234/samples
39
+ # The interval, in epochs. For instance, if it is set to 5,
40
+ # progress samples will be output every 5 epochs
41
+ progress_samples_interval: 1
42
+ # The sample size for raw batch samples saved in batch.pth
43
+ # (useful mostly for model debugging)
44
+ progress_batch_sample_size: 3
45
+
46
+ #################################
47
+ # Data files and pre-processing #
48
+ #################################
49
+ data_folder: data_folder
50
+ # e.g, /localscratch/ljspeech
51
+
52
+ train_json: ./results/tacotron2/1234/save/train.json
53
+ valid_json: ./results/tacotron2/1234/save/valid.json
54
+ test_json: ./results/tacotron2/1234/save/test.json
55
+
56
+ splits: [train, valid, test]
57
+ split_ratio: [80, 10, 10]
58
+
59
+ skip_prep: false
60
+
61
+ # Use the original preprocessing from nvidia
62
+ # The cleaners to be used (applicable to nvidia only)
63
+ text_cleaners: [basic_cleaners]
64
+
65
+ ################################
66
+ # Audio Parameters #
67
+ ################################
68
+ sample_rate: 22050
69
+ hop_length: 256
70
+ win_length: 1024
71
+ n_mel_channels: 80
72
+ n_fft: 1024
73
+ mel_fmin: 0.0
74
+ mel_fmax: 8000.0
75
+ mel_normalized: false
76
+ power: 1
77
+ norm: slaney
78
+ mel_scale: slaney
79
+ dynamic_range_compression: true
80
+
81
+ ################################
82
+ # Optimization Hyperparameters #
83
+ ################################
84
+ learning_rate: 0.001
85
+ weight_decay: 0.000006
86
+ batch_size: 256
87
+ num_workers: 8
88
+ mask_padding: true
89
+ guided_attention_sigma: 0.2
90
+ guided_attention_weight: 50.0
91
+ guided_attention_weight_half_life: 10.
92
+ guided_attention_hard_stop: 50
93
+ gate_loss_weight: 1.0
94
+
95
+ train_dataloader_opts:
96
+ batch_size: 256
97
+ drop_last: false #True #False
98
+ num_workers: 8
99
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
100
+
101
+ valid_dataloader_opts:
102
+ batch_size: 256
103
+ num_workers: 8
104
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
105
+
106
+ test_dataloader_opts:
107
+ batch_size: 256
108
+ num_workers: 8
109
+ collate_fn: !new:speechbrain.lobes.models.Tacotron2.TextMelCollate
110
+
111
+ ################################
112
+ # Model Parameters and model #
113
+ ################################
114
+ n_symbols: 148 #fixed depending on symbols in textToSequence
115
+ symbols_embedding_dim: 512
116
+
117
+ # Encoder parameters
118
+ encoder_kernel_size: 5
119
+ encoder_n_convolutions: 3
120
+ encoder_embedding_dim: 512
121
+
122
+ # Decoder parameters
123
+ # The number of frames in the target per encoder step
124
+ n_frames_per_step: 1
125
+ decoder_rnn_dim: 1024
126
+ prenet_dim: 256
127
+ max_decoder_steps: 1000
128
+ gate_threshold: 0.5
129
+ p_attention_dropout: 0.1
130
+ p_decoder_dropout: 0.1
131
+ decoder_no_early_stopping: false
132
+
133
+ # Attention parameters
134
+ attention_rnn_dim: 1024
135
+ attention_dim: 128
136
+
137
+ # Location Layer parameters
138
+ attention_location_n_filters: 32
139
+ attention_location_kernel_size: 31
140
+
141
+ # Mel-post processing network parameters
142
+ postnet_embedding_dim: 512
143
+ postnet_kernel_size: 5
144
+ postnet_n_convolutions: 5
145
+
146
+ mel_spectogram: !name:speechbrain.lobes.models.Tacotron2.mel_spectogram
147
+ sample_rate: 22050
148
+ hop_length: 256
149
+ win_length: 1024
150
+ n_fft: 1024
151
+ n_mels: 80
152
+ f_min: 0.0
153
+ f_max: 8000.0
154
+ power: 1
155
+ normalized: false
156
+ norm: slaney
157
+ mel_scale: slaney
158
+ compression: true
159
+
160
+ #model
161
+ model: &id002 !new:speechbrain.lobes.models.Tacotron2.Tacotron2
162
+
163
+ #optimizer
164
+ mask_padding: true
165
+ n_mel_channels: 80
166
+ # symbols
167
+ n_symbols: 148
168
+ symbols_embedding_dim: 512
169
+ # encoder
170
+ encoder_kernel_size: 5
171
+ encoder_n_convolutions: 3
172
+ encoder_embedding_dim: 512
173
+ # attention
174
+ attention_rnn_dim: 1024
175
+ attention_dim: 128
176
+ # attention location
177
+ attention_location_n_filters: 32
178
+ attention_location_kernel_size: 31
179
+ # decoder
180
+ n_frames_per_step: 1
181
+ decoder_rnn_dim: 1024
182
+ prenet_dim: 256
183
+ max_decoder_steps: 1000
184
+ gate_threshold: 0.5
185
+ p_attention_dropout: 0.1
186
+ p_decoder_dropout: 0.1
187
+ # postnet
188
+ postnet_embedding_dim: 512
189
+ postnet_kernel_size: 5
190
+ postnet_n_convolutions: 5
191
+ decoder_no_early_stopping: false
192
+
193
+ guided_attention_scheduler: &id001 !new:speechbrain.nnet.schedulers.StepScheduler
194
+ initial_value: 50.0
195
+ half_life: 10.
196
+
197
+ criterion: !new:speechbrain.lobes.models.Tacotron2.Loss
198
+ gate_loss_weight: 1.0
199
+ guided_attention_weight: 50.0
200
+ guided_attention_sigma: 0.2
201
+ guided_attention_scheduler: *id001
202
+ guided_attention_hard_stop: 50
203
+
204
+ modules:
205
+ model: *id002
206
+ opt_class: !name:torch.optim.Adam
207
+ lr: 0.001
208
+ weight_decay: 0.000006
209
+
210
+ #epoch object
211
+ epoch_counter: &id003 !new:speechbrain.utils.epoch_loop.EpochCounter
212
+ limit: 500
213
+
214
+ # train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
215
+ # save_file: !ref <train_log>
216
+ train_logger: !new:speechbrain.utils.train_logger.WandBLogger
217
+ initializer: !name:wandb.init
218
+ # id: !ref <wandb_id>
219
+ name: tacotron2-luganda
220
+ entity: sulaiman-kagumire
221
+ project: tts-luganda
222
+ reinit: true
223
+ # yaml_config: hparams/train.yaml
224
+ resume: allow
225
+
226
+ #annealing_function
227
+ lr_annealing: &id004 !new:speechbrain.nnet.schedulers.IntervalScheduler
228
+
229
+ #infer: !name:speechbrain.lobes.models.Tacotron2.infer
230
+
231
+ intervals:
232
+ - steps: 6000
233
+ lr: 0.0005
234
+ - steps: 8000
235
+ lr: 0.0003
236
+ - steps: 10000
237
+ lr: 0.0001
238
+
239
+ #checkpointer
240
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
241
+ checkpoints_dir: ./results/tacotron2/1234/save
242
+ recoverables:
243
+ model: *id002
244
+ counter: *id003
245
+ scheduler: *id004
246
+ progress_sample_logger: !new:speechbrain.utils.train_logger.ProgressSampleLogger
247
+ output_path: ./results/tacotron2/1234/samples
248
+ batch_sample_size: 3
249
+ formats:
250
+ raw_batch: raw
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2600ccebd2116d3f97b39e3f5f16d0e607b03e0008a699efa510c48e14331a0
3
+ size 112826573