soumi-maiti commited on
Commit
f0bec87
1 Parent(s): cd558fc

Add model files

Browse files
exp/tts_finetune/25epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4aab3cb5e447c1474cfc1bdf4a7b20388d0b3bc1d9c229a5e5d45735d93b5a4
3
+ size 132551583
exp/tts_finetune/config.yaml ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/finetune_train_transformer_sid.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_finetune
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 100
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 3
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param:
65
+ - /ocean/projects/cis210027p/smaiti/espnet1/egs2/cmu_indic/tts1/../tts1_pre_fine/exp/tts_train_transformer_sid_raw_phn_none/train.loss.ave_5best.pth
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: 1000
69
+ batch_size: 20
70
+ valid_batch_size: null
71
+ batch_bins: 6000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp/tts_stats_raw_phn_none/train/text_shape.phn
75
+ - exp/tts_stats_raw_phn_none/train/speech_shape
76
+ valid_shape_file:
77
+ - exp/tts_stats_raw_phn_none/valid/text_shape.phn
78
+ - exp/tts_stats_raw_phn_none/valid/speech_shape
79
+ batch_type: numel
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 150
83
+ - 204800
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ train_data_path_and_name_and_type:
91
+ - - dump/raw/hin_ab_train_no_dev/text
92
+ - text
93
+ - text
94
+ - - dump/raw/hin_ab_train_no_dev/wav.scp
95
+ - speech
96
+ - sound
97
+ - - dump/raw/hin_ab_train_no_dev/utt2sid
98
+ - sids
99
+ - text_int
100
+ valid_data_path_and_name_and_type:
101
+ - - dump/raw/hin_ab_dev/text
102
+ - text
103
+ - text
104
+ - - dump/raw/hin_ab_dev/wav.scp
105
+ - speech
106
+ - sound
107
+ - - dump/raw/hin_ab_dev/utt2sid
108
+ - sids
109
+ - text_int
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ valid_max_cache_size: null
114
+ optim: adam
115
+ optim_conf:
116
+ lr: 0.002
117
+ scheduler: noamlr
118
+ scheduler_conf:
119
+ model_size: 512
120
+ warmup_steps: 8000
121
+ token_list:
122
+ - <blank>
123
+ - <unk>
124
+ - '@'
125
+ - n
126
+ - I
127
+ - a
128
+ - k
129
+ - m
130
+ - r
131
+ - l
132
+ - s
133
+ - e
134
+ - t
135
+ - i
136
+ - j
137
+ - 'a:'
138
+ - p
139
+ - d
140
+ - U
141
+ - 'i:'
142
+ - u
143
+ - o
144
+ - w
145
+ - t_d
146
+ - r\
147
+ - g
148
+ - h\
149
+ - b
150
+ - O
151
+ - P
152
+ - A
153
+ - d_d
154
+ - '{'
155
+ - v
156
+ - tS
157
+ - z
158
+ - E
159
+ - h
160
+ - V
161
+ - dZ
162
+ - D
163
+ - r\=
164
+ - N
165
+ - S
166
+ - d`
167
+ - t`
168
+ - f
169
+ - 'e:'
170
+ - 'o:'
171
+ - 'u:'
172
+ - '4'
173
+ - n`
174
+ - b_t
175
+ - k_h
176
+ - '{:'
177
+ - s`
178
+ - J
179
+ - n_d
180
+ - d_d_t
181
+ - T
182
+ - t_h
183
+ - ts\
184
+ - s\
185
+ - l`
186
+ - 'O:'
187
+ - t`_h
188
+ - l_d
189
+ - r`
190
+ - s_d
191
+ - dz\
192
+ - d_t
193
+ - t_d_h
194
+ - g_t
195
+ - p_h
196
+ - A~
197
+ - dz
198
+ - tS_h
199
+ - e_^
200
+ - '@~'
201
+ - dZ_t
202
+ - 'tS:'
203
+ - 'a~:'
204
+ - u~
205
+ - 'u~:'
206
+ - ts\_h
207
+ - r=
208
+ - Z
209
+ - o~
210
+ - 'k:'
211
+ - 't_d:'
212
+ - O~
213
+ - dz\_t
214
+ - I~
215
+ - r`_0
216
+ - i~
217
+ - d`_t
218
+ - U~
219
+ - r\`
220
+ - p\
221
+ - e~
222
+ - E~
223
+ - 't`:'
224
+ - 'd`:'
225
+ - x
226
+ - 'g:'
227
+ - 'l:'
228
+ - 'dZ:'
229
+ - 's:'
230
+ - a~
231
+ - q
232
+ - 'p:'
233
+ - N_t
234
+ - 'd_d:'
235
+ - O_t
236
+ - r_t
237
+ - G
238
+ - e_t
239
+ - a_t
240
+ - i_t
241
+ - u_t
242
+ - 'b:'
243
+ - 'S:'
244
+ - 'n:'
245
+ - <sos/eos>
246
+ odim: null
247
+ model_conf: {}
248
+ use_preprocessor: true
249
+ token_type: phn
250
+ bpemodel: null
251
+ non_linguistic_symbols: null
252
+ cleaner: null
253
+ g2p: null
254
+ feats_extract: fbank
255
+ feats_extract_conf:
256
+ n_fft: 1024
257
+ hop_length: 256
258
+ win_length: null
259
+ fs: 16000
260
+ fmin: 80
261
+ fmax: 7600
262
+ n_mels: 80
263
+ normalize: global_mvn
264
+ normalize_conf:
265
+ stats_file: exp/tts_stats_raw_phn_none/train/feats_stats.npz
266
+ tts: transformer
267
+ tts_conf:
268
+ spks: 10
269
+ embed_dim: 0
270
+ eprenet_conv_layers: 0
271
+ eprenet_conv_filts: 0
272
+ eprenet_conv_chans: 0
273
+ dprenet_layers: 2
274
+ dprenet_units: 256
275
+ adim: 512
276
+ aheads: 8
277
+ elayers: 6
278
+ eunits: 1024
279
+ dlayers: 6
280
+ dunits: 1024
281
+ positionwise_layer_type: conv1d
282
+ positionwise_conv_kernel_size: 1
283
+ postnet_layers: 5
284
+ postnet_filts: 5
285
+ postnet_chans: 256
286
+ use_masking: true
287
+ bce_pos_weight: 5.0
288
+ use_scaled_pos_enc: true
289
+ encoder_normalize_before: true
290
+ decoder_normalize_before: true
291
+ reduction_factor: 1
292
+ init_type: xavier_uniform
293
+ init_enc_alpha: 1.0
294
+ init_dec_alpha: 1.0
295
+ eprenet_dropout_rate: 0.0
296
+ dprenet_dropout_rate: 0.5
297
+ postnet_dropout_rate: 0.5
298
+ transformer_enc_dropout_rate: 0.1
299
+ transformer_enc_positional_dropout_rate: 0.1
300
+ transformer_enc_attn_dropout_rate: 0.1
301
+ transformer_dec_dropout_rate: 0.1
302
+ transformer_dec_positional_dropout_rate: 0.1
303
+ transformer_dec_attn_dropout_rate: 0.1
304
+ transformer_enc_dec_attn_dropout_rate: 0.1
305
+ use_guided_attn_loss: true
306
+ num_heads_applied_guided_attn: 2
307
+ num_layers_applied_guided_attn: 2
308
+ modules_applied_guided_attn:
309
+ - encoder-decoder
310
+ guided_attn_loss_sigma: 0.4
311
+ guided_attn_loss_lambda: 10.0
312
+ pitch_extract: null
313
+ pitch_extract_conf: {}
314
+ pitch_normalize: null
315
+ pitch_normalize_conf: {}
316
+ energy_extract: null
317
+ energy_extract_conf: {}
318
+ energy_normalize: null
319
+ energy_normalize_conf: {}
320
+ required:
321
+ - output_dir
322
+ - token_list
323
+ version: 0.10.7a1
324
+ distributed: false
exp/tts_finetune/images/backward_time.png ADDED
exp/tts_finetune/images/bce_loss.png ADDED
exp/tts_finetune/images/decoder_alpha.png ADDED
exp/tts_finetune/images/enc_dec_attn_loss.png ADDED
exp/tts_finetune/images/encoder_alpha.png ADDED
exp/tts_finetune/images/forward_time.png ADDED
exp/tts_finetune/images/gpu_max_cached_mem_GB.png ADDED
exp/tts_finetune/images/iter_time.png ADDED
exp/tts_finetune/images/l1_loss.png ADDED
exp/tts_finetune/images/l2_loss.png ADDED
exp/tts_finetune/images/loss.png ADDED
exp/tts_finetune/images/optim0_lr0.png ADDED
exp/tts_finetune/images/optim_step_time.png ADDED
exp/tts_finetune/images/train_time.png ADDED
exp/tts_finetune/valid.loss.best.pth ADDED
@@ -0,0 +1 @@
 
1
+ 25epoch.pth