ahnafsamin commited on
Commit
a09f3e8
1 Parent(s): 3234b81

Upload config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +257 -0
config.yaml ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_fastspeech2_raw_char_tacotron
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 8
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: 800
68
+ batch_size: 20
69
+ valid_batch_size: null
70
+ batch_bins: 3000000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.char
74
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape
75
+ valid_shape_file:
76
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.char
77
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape
78
+ batch_type: numel
79
+ valid_batch_type: null
80
+ fold_length:
81
+ - 150
82
+ - 204800
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/tr_no_dev/text
91
+ - text
92
+ - text
93
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations
94
+ - durations
95
+ - text_int
96
+ - - dump/raw/tr_no_dev/wav.scp
97
+ - speech
98
+ - sound
99
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/pitch.scp
100
+ - pitch
101
+ - npy
102
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/energy.scp
103
+ - energy
104
+ - npy
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/dev/text
107
+ - text
108
+ - text
109
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations
110
+ - durations
111
+ - text_int
112
+ - - dump/raw/dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/pitch.scp
116
+ - pitch
117
+ - npy
118
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/energy.scp
119
+ - energy
120
+ - npy
121
+ allow_variable_data_keys: false
122
+ max_cache_size: 0.0
123
+ max_cache_fd: 32
124
+ valid_max_cache_size: null
125
+ optim: adam
126
+ optim_conf:
127
+ lr: 1.0
128
+ scheduler: noamlr
129
+ scheduler_conf:
130
+ model_size: 384
131
+ warmup_steps: 4000
132
+ token_list:
133
+ - <blank>
134
+ - <unk>
135
+ - <space>
136
+ - E
137
+ - N
138
+ - A
139
+ - O
140
+ - T
141
+ - I
142
+ - R
143
+ - D
144
+ - L
145
+ - S
146
+ - K
147
+ - M
148
+ - U
149
+ - G
150
+ - H
151
+ - W
152
+ - V
153
+ - Z
154
+ - P
155
+ - B
156
+ - .
157
+ - J
158
+ - C
159
+ - ','
160
+ - F
161
+ - ''''
162
+ - '?'
163
+ - '!'
164
+ - Y
165
+ - X
166
+ - '`'
167
+ - <sos/eos>
168
+ odim: null
169
+ model_conf: {}
170
+ use_preprocessor: true
171
+ token_type: char
172
+ bpemodel: null
173
+ non_linguistic_symbols: null
174
+ cleaner: tacotron
175
+ g2p: g2p_en
176
+ feats_extract: fbank
177
+ feats_extract_conf:
178
+ n_fft: 1024
179
+ hop_length: 256
180
+ win_length: null
181
+ fs: 22050
182
+ fmin: 80
183
+ fmax: 7600
184
+ n_mels: 80
185
+ normalize: global_mvn
186
+ normalize_conf:
187
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
188
+ tts: fastspeech2
189
+ tts_conf:
190
+ adim: 384
191
+ aheads: 2
192
+ elayers: 4
193
+ eunits: 1536
194
+ dlayers: 4
195
+ dunits: 1536
196
+ positionwise_layer_type: conv1d
197
+ positionwise_conv_kernel_size: 3
198
+ duration_predictor_layers: 2
199
+ duration_predictor_chans: 256
200
+ duration_predictor_kernel_size: 3
201
+ postnet_layers: 5
202
+ postnet_filts: 5
203
+ postnet_chans: 256
204
+ use_masking: true
205
+ use_scaled_pos_enc: true
206
+ encoder_normalize_before: true
207
+ decoder_normalize_before: true
208
+ reduction_factor: 1
209
+ init_type: xavier_uniform
210
+ init_enc_alpha: 1.0
211
+ init_dec_alpha: 1.0
212
+ transformer_enc_dropout_rate: 0.2
213
+ transformer_enc_positional_dropout_rate: 0.2
214
+ transformer_enc_attn_dropout_rate: 0.2
215
+ transformer_dec_dropout_rate: 0.2
216
+ transformer_dec_positional_dropout_rate: 0.2
217
+ transformer_dec_attn_dropout_rate: 0.2
218
+ pitch_predictor_layers: 5
219
+ pitch_predictor_chans: 256
220
+ pitch_predictor_kernel_size: 5
221
+ pitch_predictor_dropout: 0.5
222
+ pitch_embed_kernel_size: 1
223
+ pitch_embed_dropout: 0.0
224
+ stop_gradient_from_pitch_predictor: true
225
+ energy_predictor_layers: 2
226
+ energy_predictor_chans: 256
227
+ energy_predictor_kernel_size: 3
228
+ energy_predictor_dropout: 0.5
229
+ energy_embed_kernel_size: 1
230
+ energy_embed_dropout: 0.0
231
+ stop_gradient_from_energy_predictor: false
232
+ pitch_extract: dio
233
+ pitch_extract_conf:
234
+ fs: 22050
235
+ n_fft: 1024
236
+ hop_length: 256
237
+ f0max: 400
238
+ f0min: 80
239
+ reduction_factor: 1
240
+ pitch_normalize: global_mvn
241
+ pitch_normalize_conf:
242
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz
243
+ energy_extract: energy
244
+ energy_extract_conf:
245
+ fs: 22050
246
+ n_fft: 1024
247
+ hop_length: 256
248
+ win_length: null
249
+ reduction_factor: 1
250
+ energy_normalize: global_mvn
251
+ energy_normalize_conf:
252
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz
253
+ required:
254
+ - output_dir
255
+ - token_list
256
+ version: 0.10.6
257
+ distributed: false