julien-c HF staff commited on
Commit
03a7def
1 Parent(s): 2063d56

initial import from https://zenodo.org/record/3969118

Browse files
.gitattributes CHANGED
@@ -6,3 +6,4 @@
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.pth filter=lfs diff=lfs merge=lfs -text
exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/tts_train_tacotron2_raw_phn_pypinyin_g2p_phone/199epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10844464e4febdfeaae598b185b99ae6c46e2322f4ad93fcc63f20a13972a611
3
+ size 107274977
exp/tts_train_tacotron2_raw_phn_pypinyin_g2p_phone/config.yaml ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_raw_phn_pypinyin_g2p_phone
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 200
26
+ patience: null
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - valid
36
+ - loss
37
+ - min
38
+ - - train
39
+ - loss
40
+ - min
41
+ keep_nbest_models: 5
42
+ grad_clip: 1.0
43
+ grad_noise: false
44
+ accum_grad: 1
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ log_interval: null
49
+ pretrain_path: []
50
+ pretrain_key: []
51
+ num_iters_per_epoch: null
52
+ batch_size: 20
53
+ valid_batch_size: null
54
+ batch_bins: 3750000
55
+ valid_batch_bins: null
56
+ train_shape_file:
57
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/text_shape.phn
58
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/speech_shape
59
+ valid_shape_file:
60
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/valid/text_shape.phn
61
+ - exp/tts_stats_raw_phn_pypinyin_g2p_phone/valid/speech_shape
62
+ batch_type: numel
63
+ valid_batch_type: null
64
+ fold_length:
65
+ - 150
66
+ - 240000
67
+ sort_in_batch: descending
68
+ sort_batch: descending
69
+ multiple_iterator: false
70
+ chunk_length: 500
71
+ chunk_shift_ratio: 0.5
72
+ num_cache_chunks: 1024
73
+ train_data_path_and_name_and_type:
74
+ - - dump/raw/tr_no_dev/text
75
+ - text
76
+ - text
77
+ - - dump/raw/tr_no_dev/wav.scp
78
+ - speech
79
+ - sound
80
+ valid_data_path_and_name_and_type:
81
+ - - dump/raw/dev/text
82
+ - text
83
+ - text
84
+ - - dump/raw/dev/wav.scp
85
+ - speech
86
+ - sound
87
+ allow_variable_data_keys: false
88
+ max_cache_size: 0.0
89
+ valid_max_cache_size: null
90
+ optim: adam
91
+ optim_conf:
92
+ lr: 0.001
93
+ eps: 1.0e-06
94
+ weight_decay: 0.0
95
+ scheduler: null
96
+ scheduler_conf: {}
97
+ token_list:
98
+ - <blank>
99
+ - <unk>
100
+ - "\uFF30"
101
+ - "\uFF22"
102
+ - "\xFC"
103
+ - an
104
+ - ueng3
105
+ - '2'
106
+ - uen
107
+ - ei
108
+ - ua
109
+ - ao
110
+ - u
111
+ - ueng4
112
+ - uo
113
+ - ang
114
+ - ou
115
+ - v2
116
+ - ueng1
117
+ - o
118
+ - io1
119
+ - "\xFCn3"
120
+ - er
121
+ - ve4
122
+ - o3
123
+ - uai2
124
+ - uen3
125
+ - uen1
126
+ - uai3
127
+ - "\xFCe3"
128
+ - iou1
129
+ - iong2
130
+ - ia2
131
+ - uai1
132
+ - iong1
133
+ - "\xFCan1"
134
+ - "\xFCe1"
135
+ - v4
136
+ - ua3
137
+ - ia
138
+ - iong3
139
+ - uei3
140
+ - ua2
141
+ - ia3
142
+ - uei1
143
+ - o1
144
+ - o4
145
+ - "\xFCn2"
146
+ - un2
147
+ - er3
148
+ - "\xFCn1"
149
+ - uen4
150
+ - un3
151
+ - iu1
152
+ - "\xFCn4"
153
+ - uen2
154
+ - "\xFCan3"
155
+ - un4
156
+ - "\xFCan4"
157
+ - iu3
158
+ - ua1
159
+ - uei2
160
+ - "\uFF01"
161
+ - iou4
162
+ - iou2
163
+ - er4
164
+ - o2
165
+ - ei1
166
+ - iao2
167
+ - uang4
168
+ - "\xFC1"
169
+ - ui2
170
+ - v3
171
+ - uang2
172
+ - iong4
173
+ - un1
174
+ - ui1
175
+ - ua4
176
+ - ao2
177
+ - en
178
+ - a
179
+ - iu2
180
+ - uang1
181
+ - uang3
182
+ - "\xFCe2"
183
+ - in3
184
+ - "\uFF1F"
185
+ - uai4
186
+ - "\xFCe4"
187
+ - uan2
188
+ - ou2
189
+ - eng3
190
+ - ui3
191
+ - uan4
192
+ - a2
193
+ - ie2
194
+ - ong3
195
+ - iang2
196
+ - ie1
197
+ - in4
198
+ - iao1
199
+ - e1
200
+ - in2
201
+ - en4
202
+ - uan3
203
+ - "\xFC2"
204
+ - ing3
205
+ - i
206
+ - ei2
207
+ - ei3
208
+ - iang1
209
+ - er2
210
+ - ia4
211
+ - uo2
212
+ - "\xFC3"
213
+ - uan1
214
+ - ia1
215
+ - e3
216
+ - ong4
217
+ - ie4
218
+ - ai1
219
+ - en3
220
+ - iang3
221
+ - eng4
222
+ - iang4
223
+ - ao1
224
+ - ou1
225
+ - ang2
226
+ - ai3
227
+ - iu4
228
+ - "\xFCan2"
229
+ - ang3
230
+ - en1
231
+ - ong2
232
+ - uei4
233
+ - ei4
234
+ - iao3
235
+ - "\xFC4"
236
+ - an2
237
+ - ing4
238
+ - an3
239
+ - a3
240
+ - ie3
241
+ - an1
242
+ - ian3
243
+ - uo1
244
+ - ing1
245
+ - ou4
246
+ - ian1
247
+ - ou3
248
+ - eng1
249
+ - ang1
250
+ - in1
251
+ - a4
252
+ - eng2
253
+ - uo4
254
+ - u1
255
+ - ang4
256
+ - iou3
257
+ - iao4
258
+ - ian2
259
+ - u2
260
+ - ui4
261
+ - e2
262
+ - en2
263
+ - u3
264
+ - ing2
265
+ - ao4
266
+ - ong1
267
+ - an4
268
+ - ai2
269
+ - ao3
270
+ - uo3
271
+ - ian4
272
+ - p
273
+ - c
274
+ - a1
275
+ - ai4
276
+ - e4
277
+ - s
278
+ - k
279
+ - r
280
+ - i2
281
+ - f
282
+ - n
283
+ - u4
284
+ - ch
285
+ - i3
286
+ - i1
287
+ - q
288
+ - z
289
+ - m
290
+ - t
291
+ - g
292
+ - b
293
+ - e
294
+ - h
295
+ - i4
296
+ - x
297
+ - "\uFF0C"
298
+ - zh
299
+ - "\u3002"
300
+ - l
301
+ - j
302
+ - sh
303
+ - d
304
+ - <sos/eos>
305
+ odim: null
306
+ model_conf: {}
307
+ use_preprocessor: true
308
+ token_type: phn
309
+ bpemodel: null
310
+ non_linguistic_symbols: null
311
+ cleaner: null
312
+ g2p: pypinyin_g2p_phone
313
+ feats_extract: fbank
314
+ feats_extract_conf:
315
+ fs: 24000
316
+ fmin: 80
317
+ fmax: 7600
318
+ n_mels: 80
319
+ hop_length: 300
320
+ n_fft: 2048
321
+ win_length: 1200
322
+ normalize: global_mvn
323
+ normalize_conf:
324
+ stats_file: exp/tts_stats_raw_phn_pypinyin_g2p_phone/train/feats_stats.npz
325
+ tts: tacotron2
326
+ tts_conf:
327
+ embed_dim: 512
328
+ elayers: 1
329
+ eunits: 512
330
+ econv_layers: 3
331
+ econv_chans: 512
332
+ econv_filts: 5
333
+ atype: location
334
+ adim: 512
335
+ aconv_chans: 32
336
+ aconv_filts: 15
337
+ cumulate_att_w: true
338
+ dlayers: 2
339
+ dunits: 1024
340
+ prenet_layers: 2
341
+ prenet_units: 256
342
+ postnet_layers: 5
343
+ postnet_chans: 512
344
+ postnet_filts: 5
345
+ output_activation: null
346
+ use_batch_norm: true
347
+ use_concate: true
348
+ use_residual: false
349
+ dropout_rate: 0.5
350
+ zoneout_rate: 0.1
351
+ reduction_factor: 1
352
+ spk_embed_dim: null
353
+ use_masking: true
354
+ bce_pos_weight: 5.0
355
+ use_guided_attn_loss: true
356
+ guided_attn_loss_sigma: 0.4
357
+ guided_attn_loss_lambda: 1.0
358
+ required:
359
+ - output_dir
360
+ - token_list
361
+ distributed: false
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.8.0
2
+ files:
3
+ model_file: exp/tts_train_tacotron2_raw_phn_pypinyin_g2p_phone/199epoch.pth
4
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
5
+ timestamp: 1596252269.687669
6
+ torch: 1.5.1
7
+ yaml_files:
8
+ train_config: exp/tts_train_tacotron2_raw_phn_pypinyin_g2p_phone/config.yaml