Roh commited on
Commit
293dc90
1 Parent(s): 50fc09d

add model files

Browse files
exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
exp/tts_train_fastspeech2_raw_phn_tacotron_g2p_en_no_space/1000epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ccf306f3b99d88ad09b840d004fea7bc3ed4ef7440952b2198d853f6ceab6cc
3
+ size 207206157
exp/tts_train_fastspeech2_raw_phn_tacotron_g2p_en_no_space/config.yaml ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_fastspeech2_raw_phn_tacotron_g2p_en_no_space
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 1000
26
+ patience: null
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - valid
36
+ - loss
37
+ - min
38
+ - - train
39
+ - loss
40
+ - min
41
+ keep_nbest_models: 5
42
+ grad_clip: 1.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 6
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ pretrain_path: []
52
+ pretrain_key: []
53
+ num_iters_per_epoch: 500
54
+ batch_size: 20
55
+ valid_batch_size: null
56
+ batch_bins: 800000
57
+ valid_batch_bins: null
58
+ train_shape_file:
59
+ - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.phn
60
+ - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape
61
+ valid_shape_file:
62
+ - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.phn
63
+ - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape
64
+ batch_type: numel
65
+ valid_batch_type: null
66
+ fold_length:
67
+ - 150
68
+ - 204800
69
+ sort_in_batch: descending
70
+ sort_batch: descending
71
+ multiple_iterator: false
72
+ chunk_length: 500
73
+ chunk_shift_ratio: 0.5
74
+ num_cache_chunks: 1024
75
+ train_data_path_and_name_and_type:
76
+ - - dump/raw/tr_no_dev/text
77
+ - text
78
+ - text
79
+ - - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations
80
+ - durations
81
+ - text_int
82
+ - - dump/raw/tr_no_dev/wav.scp
83
+ - speech
84
+ - sound
85
+ valid_data_path_and_name_and_type:
86
+ - - dump/raw/dev/text
87
+ - text
88
+ - text
89
+ - - exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations
90
+ - durations
91
+ - text_int
92
+ - - dump/raw/dev/wav.scp
93
+ - speech
94
+ - sound
95
+ allow_variable_data_keys: false
96
+ max_cache_size: 0.0
97
+ max_cache_fd: 32
98
+ valid_max_cache_size: null
99
+ optim: adam
100
+ optim_conf:
101
+ lr: 1.0
102
+ scheduler: noamlr
103
+ scheduler_conf:
104
+ model_size: 384
105
+ warmup_steps: 4000
106
+ token_list:
107
+ - <blank>
108
+ - <unk>
109
+ - AH0
110
+ - T
111
+ - N
112
+ - S
113
+ - R
114
+ - D
115
+ - L
116
+ - K
117
+ - IH1
118
+ - M
119
+ - EH1
120
+ - Z
121
+ - DH
122
+ - UW1
123
+ - AE1
124
+ - IH0
125
+ - AY1
126
+ - AH1
127
+ - W
128
+ - .
129
+ - P
130
+ - F
131
+ - IY1
132
+ - V
133
+ - ER0
134
+ - AA1
135
+ - B
136
+ - AO1
137
+ - HH
138
+ - EY1
139
+ - IY0
140
+ - ','
141
+ - Y
142
+ - NG
143
+ - OW1
144
+ - G
145
+ - AW1
146
+ - TH
147
+ - SH
148
+ - UH1
149
+ - '?'
150
+ - ER1
151
+ - JH
152
+ - CH
153
+ - OW0
154
+ - OW2
155
+ - EH2
156
+ - IH2
157
+ - EY2
158
+ - AA2
159
+ - AE2
160
+ - AY2
161
+ - ''''
162
+ - OY1
163
+ - UW0
164
+ - '!'
165
+ - AO2
166
+ - EH0
167
+ - ZH
168
+ - AH2
169
+ - AE0
170
+ - UW2
171
+ - AA0
172
+ - AY0
173
+ - IY2
174
+ - AW2
175
+ - AO0
176
+ - EY0
177
+ - ER2
178
+ - UH2
179
+ - '...'
180
+ - AW0
181
+ - UH0
182
+ - OY2
183
+ - <sos/eos>
184
+ odim: null
185
+ model_conf: {}
186
+ use_preprocessor: true
187
+ token_type: phn
188
+ bpemodel: null
189
+ non_linguistic_symbols: null
190
+ cleaner: tacotron
191
+ g2p: g2p_en_no_space
192
+ feats_extract: fbank
193
+ feats_extract_conf:
194
+ fs: 22050
195
+ fmin: 80
196
+ fmax: 7600
197
+ n_mels: 80
198
+ hop_length: 256
199
+ n_fft: 1024
200
+ win_length: null
201
+ normalize: global_mvn
202
+ normalize_conf:
203
+ stats_file: exp/tts_train_raw_phn_tacotron_g2p_en_no_space/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
204
+ tts: fastspeech
205
+ tts_conf:
206
+ adim: 384
207
+ aheads: 2
208
+ elayers: 6
209
+ eunits: 1536
210
+ dlayers: 6
211
+ dunits: 1536
212
+ positionwise_layer_type: conv1d
213
+ positionwise_conv_kernel_size: 3
214
+ duration_predictor_layers: 2
215
+ duration_predictor_chans: 384
216
+ duration_predictor_kernel_size: 3
217
+ postnet_layers: 5
218
+ postnet_filts: 5
219
+ postnet_chans: 256
220
+ use_masking: true
221
+ use_scaled_pos_enc: true
222
+ encoder_normalize_before: true
223
+ decoder_normalize_before: true
224
+ reduction_factor: 1
225
+ init_type: xavier_uniform
226
+ init_enc_alpha: 1.0
227
+ init_dec_alpha: 1.0
228
+ transformer_enc_dropout_rate: 0.1
229
+ transformer_enc_positional_dropout_rate: 0.1
230
+ transformer_enc_attn_dropout_rate: 0.1
231
+ transformer_dec_dropout_rate: 0.1
232
+ transformer_dec_positional_dropout_rate: 0.1
233
+ transformer_dec_attn_dropout_rate: 0.1
234
+ pitch_extract: null
235
+ pitch_extract_conf: {}
236
+ pitch_normalize: null
237
+ pitch_normalize_conf: {}
238
+ energy_extract: null
239
+ energy_extract_conf: {}
240
+ energy_normalize: null
241
+ energy_normalize_conf: {}
242
+ required:
243
+ - output_dir
244
+ - token_list
245
+ distributed: false