ftshijt commited on
Commit
787ca85
1 Parent(s): 95a2f2a

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +389 -0
README.md ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This model was trained by ftshijt using aishell3/tts1 recipe in <a href="https://github.com/espnet/espnet/">espnet</a>.
2
+ <p>&nbsp;</p>
3
+ <ul>
4
+ <li><strong>Python API</strong><pre><code class="language-python">See https://github.com/espnet/espnet_model_zoo</code></pre></li>
5
+ <li><strong>Evaluate in the recipe</strong><pre>
6
+ <code class="language-bash">
7
+ See ESPNet repo for how to use pre-trained models
8
+ </pre></li>
9
+ <li><strong>Config</strong><pre><code>config: conf/train.yaml
10
+ print_config: false
11
+ log_level: INFO
12
+ dry_run: false
13
+ iterator_type: sequence
14
+ output_dir: exp/tts_train_raw_phn_pypinyin_g2p_phone
15
+ ngpu: 1
16
+ seed: 0
17
+ num_workers: 1
18
+ num_att_plot: 3
19
+ dist_backend: nccl
20
+ dist_init_method: env://
21
+ dist_world_size: null
22
+ dist_rank: null
23
+ local_rank: 0
24
+ dist_master_addr: null
25
+ dist_master_port: null
26
+ dist_launcher: null
27
+ multiprocessing_distributed: false
28
+ unused_parameters: false
29
+ sharded_ddp: false
30
+ cudnn_enabled: true
31
+ cudnn_benchmark: false
32
+ cudnn_deterministic: true
33
+ collect_stats: false
34
+ write_collected_feats: false
35
+ max_epoch: 500
36
+ patience: null
37
+ val_scheduler_criterion:
38
+ - valid
39
+ - loss
40
+ early_stopping_criterion:
41
+ - valid
42
+ - loss
43
+ - min
44
+ best_model_criterion:
45
+ - - valid
46
+ - loss
47
+ - min
48
+ - - train
49
+ - loss
50
+ - min
51
+ keep_nbest_models: 5
52
+ grad_clip: 1.0
53
+ grad_clip_type: 2.0
54
+ grad_noise: false
55
+ accum_grad: 1
56
+ no_forward_run: false
57
+ resume: true
58
+ train_dtype: float32
59
+ use_amp: false
60
+ log_interval: null
61
+ use_tensorboard: true
62
+ use_wandb: false
63
+ wandb_project: null
64
+ wandb_id: null
65
+ wandb_entity: null
66
+ wandb_name: null
67
+ wandb_model_log_interval: -1
68
+ detect_anomaly: false
69
+ pretrain_path: null
70
+ init_param: []
71
+ ignore_init_mismatch: false
72
+ freeze_param: []
73
+ num_iters_per_epoch: 500
74
+ batch_size: 20
75
+ valid_batch_size: null
76
+ batch_bins: 3750000
77
+ valid_batch_bins: null
78
+ train_shape_file:
79
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/text_shape.phn
80
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/speech_shape
81
+ valid_shape_file:
82
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/valid/text_shape.phn
83
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/valid/speech_shape
84
+ batch_type: numel
85
+ valid_batch_type: null
86
+ fold_length:
87
+ - 150
88
+ - 240000
89
+ sort_in_batch: descending
90
+ sort_batch: descending
91
+ multiple_iterator: false
92
+ chunk_length: 500
93
+ chunk_shift_ratio: 0.5
94
+ num_cache_chunks: 1024
95
+ train_data_path_and_name_and_type:
96
+ - - dump/raw/train_no_dev/text
97
+ - text
98
+ - text
99
+ - - dump/raw/train_no_dev/wav.scp
100
+ - speech
101
+ - sound
102
+ - - dump/xvector/train_no_dev/xvector.scp
103
+ - spembs
104
+ - kaldi_ark
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/dev/text
107
+ - text
108
+ - text
109
+ - - dump/raw/dev/wav.scp
110
+ - speech
111
+ - sound
112
+ - - dump/xvector/dev/xvector.scp
113
+ - spembs
114
+ - kaldi_ark
115
+ allow_variable_data_keys: false
116
+ max_cache_size: 0.0
117
+ max_cache_fd: 32
118
+ valid_max_cache_size: null
119
+ optim: adam
120
+ optim_conf:
121
+ lr: 0.001
122
+ eps: 1.0e-06
123
+ weight_decay: 0.0
124
+ scheduler: null
125
+ scheduler_conf: {}
126
+ token_list:
127
+ - <blank>
128
+ - <unk>
129
+ - ''
130
+ - d
131
+ - sh
132
+ - j
133
+ - i4
134
+ - zh
135
+ - l
136
+ - x
137
+ - e
138
+ - b
139
+ - g
140
+ - i1
141
+ - h
142
+ - q
143
+ - m
144
+ - u4
145
+ - t
146
+ - z
147
+ - ch
148
+ - i3
149
+ - i2
150
+ - f
151
+ - s
152
+ - n
153
+ - r
154
+ - ian4
155
+ - e4
156
+ - ong1
157
+ - en2
158
+ - ai4
159
+ - k
160
+ - ing2
161
+ - a1
162
+ - iou3
163
+ - uo3
164
+ - ao4
165
+ - u3
166
+ - ui4
167
+ - p
168
+ - e2
169
+ - an1
170
+ - eng2
171
+ - c
172
+ - in1
173
+ - ai2
174
+ - an4
175
+ - ian2
176
+ - ing1
177
+ - ai3
178
+ - ang4
179
+ - ao3
180
+ - ian1
181
+ - uo4
182
+ - ian3
183
+ - iao4
184
+ - ang1
185
+ - u2
186
+ - ü4
187
+ - u1
188
+ - a4
189
+ - eng1
190
+ - ing4
191
+ - üan2
192
+ - ie4
193
+ - en1
194
+ - iu4
195
+ - uei4
196
+ - ou4
197
+ - er4
198
+ - e1
199
+ - ei4
200
+ - an3
201
+ - ong2
202
+ - uo2
203
+ - ang3
204
+ - ou1
205
+ - ou3
206
+ - ong4
207
+ - eng4
208
+ - an2
209
+ - iang4
210
+ - a3
211
+ - iang1
212
+ - ia1
213
+ - iao1
214
+ - uan4
215
+ - ia4
216
+ - iu3
217
+ - ang2
218
+ - uo1
219
+ - ei3
220
+ - e3
221
+ - in4
222
+ - iang3
223
+ - ü1
224
+ - uan1
225
+ - en3
226
+ - iao3
227
+ - ie3
228
+ - ao1
229
+ - ai1
230
+ - ü2
231
+ - ing3
232
+ - er2
233
+ - ü3
234
+ - uan3
235
+ - üe4
236
+ - in3
237
+ - en
238
+ - ei2
239
+ - üe2
240
+ - ie2
241
+ - en4
242
+ - ua4
243
+ - in2
244
+ - iu2
245
+ - uan2
246
+ - a2
247
+ - ie1
248
+ - ou2
249
+ - ui1
250
+ - iang2
251
+ - ong3
252
+ - i
253
+ - uang3
254
+ - eng3
255
+ - ün4
256
+ - uang4
257
+ - uai4
258
+ - iong4
259
+ - v3
260
+ - iou2
261
+ - ui2
262
+ - un1
263
+ - üan4
264
+ - uang1
265
+ - ei1
266
+ - uang2
267
+ - o2
268
+ - a
269
+ - ao2
270
+ - iao2
271
+ - ui3
272
+ - un4
273
+ - o1
274
+ - ua2
275
+ - un2
276
+ - uen2
277
+ - iu1
278
+ - v4
279
+ - ua1
280
+ - uei1
281
+ - üan3
282
+ - ün1
283
+ - üe1
284
+ - ün2
285
+ - uen4
286
+ - uei3
287
+ - uei2
288
+ - un3
289
+ - iou4
290
+ - o4
291
+ - er3
292
+ - uen1
293
+ - iong3
294
+ - iou1
295
+ - ia3
296
+ - üan1
297
+ - ia2
298
+ - iong1
299
+ - üe3
300
+ - uen3
301
+ - ve4
302
+ - iong2
303
+ - uai2
304
+ - uai1
305
+ - ua3
306
+ - ün3
307
+ - er
308
+ - uai3
309
+ - ia
310
+ - o3
311
+ - v2
312
+ - o
313
+ - ueng1
314
+ - ei
315
+ - '2'
316
+ - ua
317
+ - io1
318
+ - <sos/eos>
319
+ odim: null
320
+ model_conf: {}
321
+ use_preprocessor: true
322
+ token_type: phn
323
+ bpemodel: null
324
+ non_linguistic_symbols: null
325
+ cleaner: null
326
+ g2p: pypinyin_g2p_phone
327
+ feats_extract: fbank
328
+ feats_extract_conf:
329
+ n_fft: 2048
330
+ hop_length: 300
331
+ win_length: 1200
332
+ fs: 24000
333
+ fmin: 80
334
+ fmax: 7600
335
+ n_mels: 80
336
+ normalize: global_mvn
337
+ normalize_conf:
338
+ stats_file: exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/feats_stats.npz
339
+ tts: tacotron2
340
+ tts_conf:
341
+ embed_dim: 512
342
+ elayers: 1
343
+ eunits: 512
344
+ econv_layers: 3
345
+ econv_chans: 512
346
+ econv_filts: 5
347
+ atype: location
348
+ adim: 512
349
+ aconv_chans: 32
350
+ aconv_filts: 15
351
+ cumulate_att_w: true
352
+ dlayers: 2
353
+ dunits: 1024
354
+ prenet_layers: 2
355
+ prenet_units: 256
356
+ postnet_layers: 5
357
+ postnet_chans: 512
358
+ postnet_filts: 5
359
+ output_activation: null
360
+ use_batch_norm: true
361
+ use_concate: true
362
+ use_residual: false
363
+ spk_embed_dim: 512
364
+ spk_embed_integration_type: add
365
+ use_gst: true
366
+ gst_heads: 4
367
+ gst_tokens: 16
368
+ dropout_rate: 0.5
369
+ zoneout_rate: 0.1
370
+ reduction_factor: 1
371
+ use_masking: true
372
+ bce_pos_weight: 10.0
373
+ use_guided_attn_loss: true
374
+ guided_attn_loss_sigma: 0.4
375
+ guided_attn_loss_lambda: 1.0
376
+ pitch_extract: null
377
+ pitch_extract_conf: {}
378
+ pitch_normalize: null
379
+ pitch_normalize_conf: {}
380
+ energy_extract: null
381
+ energy_extract_conf: {}
382
+ energy_normalize: null
383
+ energy_normalize_conf: {}
384
+ required:
385
+ - output_dir
386
+ - token_list
387
+ version: 0.10.2a1
388
+ distributed: false</code></pre></li>
389
+ </ul>