viks66 commited on
Commit
2f0f94b
1 Parent(s): 4f8835a

add te ttts espnet

Browse files
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ exp/tts_train_transformer_raw_char/valid.loss.best.pth filter=lfs diff=lfs merge=lfs -text
exp/tts_stats_raw_char/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f39b43df421f5078c34ad9d8f21a44e0e82a8dc42c164601ad1b060427c582
3
+ size 1402
exp/tts_train_transformer_raw_char/config.yaml ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_transformer_raw_char
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 3000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 2
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: null
68
+ batch_size: 20
69
+ valid_batch_size: null
70
+ batch_bins: 700000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/tts_stats_raw_char/train/text_shape.char
74
+ - exp/tts_stats_raw_char/train/speech_shape
75
+ valid_shape_file:
76
+ - exp/tts_stats_raw_char/valid/text_shape.char
77
+ - exp/tts_stats_raw_char/valid/speech_shape
78
+ batch_type: numel
79
+ valid_batch_type: null
80
+ fold_length:
81
+ - 150
82
+ - 204800
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train/text
91
+ - text
92
+ - text
93
+ - - dump/raw/train/wav.scp
94
+ - speech
95
+ - sound
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/dev/text
98
+ - text
99
+ - text
100
+ - - dump/raw/dev/wav.scp
101
+ - speech
102
+ - sound
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 1.0
110
+ scheduler: noamlr
111
+ scheduler_conf:
112
+ model_size: 512
113
+ warmup_steps: 8000
114
+ token_list:
115
+ - <blank>
116
+ - <unk>
117
+ - <space>
118
+ - ్
119
+ - ి
120
+ - ా
121
+ - ర
122
+ - ు
123
+ - న
124
+ - ల
125
+ - ం
126
+ - క
127
+ - స
128
+ - త
129
+ - ప
130
+ - వ
131
+ - మ
132
+ - ట
133
+ - య
134
+ - ద
135
+ - ో
136
+ - ే
137
+ - చ
138
+ - డ
139
+ - గ
140
+ - ీ
141
+ - ె
142
+ - ','
143
+ - జ
144
+ - బ
145
+ - అ
146
+ - .
147
+ - ూ
148
+ - ై
149
+ - ష
150
+ - శ
151
+ - ొ
152
+ - హ
153
+ - ధ
154
+ - ఆ
155
+ - ఎ
156
+ - భ
157
+ - ఉ
158
+ - ఫ
159
+ - ణ
160
+ - ఇ
161
+ - ‌
162
+ - థ
163
+ - ళ
164
+ - ఖ
165
+ - ఈ
166
+ - ఒ
167
+ - ృ
168
+ - ఏ
169
+ - ౌ
170
+ - '"'
171
+ - ఓ
172
+ - ఐ
173
+ - ఘ
174
+ - ''''
175
+ - ఛ
176
+ - ఠ
177
+ - '1'
178
+ - ':'
179
+ - '?'
180
+ - '0'
181
+ - ఢ
182
+ - ఞ
183
+ - '2'
184
+ - '9'
185
+ - ఊ
186
+ - '5'
187
+ - '6'
188
+ - '3'
189
+ - ఔ
190
+ - ఝ
191
+ - '8'
192
+ - '4'
193
+ - '7'
194
+ - ఱ
195
+ - ఋ
196
+ - ఙ
197
+ - ఁ
198
+ - ౦
199
+ - ।
200
+ - ౕ
201
+ - ’
202
+ - ౩
203
+ - ‘
204
+ - ౖ
205
+ - “
206
+ - ”
207
+ - ః
208
+ - +
209
+ - (
210
+ - )
211
+ - /
212
+ - ‍
213
+ - '-'
214
+ - '%'
215
+ - ౄ
216
+ - <sos/eos>
217
+ odim: null
218
+ model_conf: {}
219
+ use_preprocessor: true
220
+ token_type: char
221
+ bpemodel: null
222
+ non_linguistic_symbols: null
223
+ cleaner: null
224
+ g2p: null
225
+ feats_extract: fbank
226
+ feats_extract_conf:
227
+ n_fft: 1024
228
+ hop_length: 256
229
+ win_length: null
230
+ fs: 22050
231
+ fmin: 0
232
+ fmax: 8000
233
+ n_mels: 80
234
+ normalize: global_mvn
235
+ normalize_conf:
236
+ stats_file: exp/tts_stats_raw_char/train/feats_stats.npz
237
+ tts: transformer
238
+ tts_conf:
239
+ embed_dim: 0
240
+ eprenet_conv_layers: 0
241
+ eprenet_conv_filts: 0
242
+ eprenet_conv_chans: 0
243
+ dprenet_layers: 2
244
+ dprenet_units: 256
245
+ adim: 512
246
+ aheads: 8
247
+ elayers: 6
248
+ eunits: 1024
249
+ dlayers: 6
250
+ dunits: 1024
251
+ positionwise_layer_type: conv1d
252
+ positionwise_conv_kernel_size: 1
253
+ postnet_layers: 5
254
+ postnet_filts: 5
255
+ postnet_chans: 256
256
+ use_masking: true
257
+ bce_pos_weight: 5.0
258
+ use_scaled_pos_enc: true
259
+ encoder_normalize_before: true
260
+ decoder_normalize_before: true
261
+ reduction_factor: 1
262
+ init_type: xavier_uniform
263
+ init_enc_alpha: 1.0
264
+ init_dec_alpha: 1.0
265
+ eprenet_dropout_rate: 0.0
266
+ dprenet_dropout_rate: 0.5
267
+ postnet_dropout_rate: 0.5
268
+ transformer_enc_dropout_rate: 0.1
269
+ transformer_enc_positional_dropout_rate: 0.1
270
+ transformer_enc_attn_dropout_rate: 0.1
271
+ transformer_dec_dropout_rate: 0.1
272
+ transformer_dec_positional_dropout_rate: 0.1
273
+ transformer_dec_attn_dropout_rate: 0.1
274
+ transformer_enc_dec_attn_dropout_rate: 0.1
275
+ use_guided_attn_loss: true
276
+ num_heads_applied_guided_attn: 2
277
+ num_layers_applied_guided_attn: 2
278
+ modules_applied_guided_attn:
279
+ - encoder-decoder
280
+ guided_attn_loss_sigma: 0.4
281
+ guided_attn_loss_lambda: 10.0
282
+ pitch_extract: null
283
+ pitch_extract_conf: {}
284
+ pitch_normalize: null
285
+ pitch_normalize_conf: {}
286
+ energy_extract: null
287
+ energy_extract_conf: {}
288
+ energy_normalize: null
289
+ energy_normalize_conf: {}
290
+ required:
291
+ - output_dir
292
+ - token_list
293
+ version: 0.10.6a1
294
+ distributed: false
exp/tts_train_transformer_raw_char/valid.loss.best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eb94ea74cf7413bd2fa7052888502d0e20a17a94118875483c0c6fe62274250
3
+ size 132478179