ftshijt commited on
Commit
b5f92fe
1 Parent(s): 58c64e7

Update model

Browse files
Files changed (33) hide show
  1. README.md +491 -1
  2. dump/xvectorespnet/dev/spk_espnet.ark +0 -0
  3. dump/xvectorespnet/dev/spk_espnet.scp +108 -0
  4. dump/xvectorespnet/eval1/spk_espnet.ark +0 -0
  5. dump/xvectorespnet/eval1/spk_espnet.scp +108 -0
  6. dump/xvectorespnet/tr_no_dev/spk_espnet.ark +0 -0
  7. dump/xvectorespnet/tr_no_dev/spk_espnet.scp +108 -0
  8. exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz +3 -0
  9. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/460epoch.pth +3 -0
  10. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml +410 -0
  11. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_backward_time.png +0 -0
  12. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_fake_loss.png +0 -0
  13. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_forward_time.png +0 -0
  14. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_loss.png +0 -0
  15. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_optim_step_time.png +0 -0
  16. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_real_loss.png +0 -0
  17. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_train_time.png +0 -0
  18. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_adv_loss.png +0 -0
  19. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_backward_time.png +0 -0
  20. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_dur_loss.png +0 -0
  21. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_feat_match_loss.png +0 -0
  22. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_forward_time.png +0 -0
  23. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_kl_loss.png +0 -0
  24. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_loss.png +0 -0
  25. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_mel_loss.png +0 -0
  26. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_optim_step_time.png +0 -0
  27. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_train_time.png +0 -0
  28. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/gpu_max_cached_mem_GB.png +0 -0
  29. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png +0 -0
  30. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/optim0_lr0.png +0 -0
  31. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/optim1_lr0.png +0 -0
  32. exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/train_time.png +0 -0
  33. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,493 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language: en
7
+ datasets:
8
+ - vctk
9
+ license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 TTS model
13
+
14
+ ### `espnet/espnet_tts_vctk_espnet_spk_voxceleb12_rawnet`
15
+
16
+ This model was trained by ftshijt using vctk recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 3e25d8421ef23c9c2d162c1732ca0503cc471257
26
+ pip install -e .
27
+ cd egs2/vctk/tts1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/espnet_tts_vctk_espnet_spk_voxceleb12_rawnet
29
+ ```
30
+
31
+
32
+
33
+ ## TTS config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/tuning/train_xvector_vits.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: sequence
44
+ valid_iterator_type: null
45
+ output_dir: exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space
46
+ ngpu: 1
47
+ seed: 777
48
+ num_workers: 4
49
+ num_att_plot: 3
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: null
53
+ dist_rank: null
54
+ local_rank: 0
55
+ dist_master_addr: null
56
+ dist_master_port: null
57
+ dist_launcher: null
58
+ multiprocessing_distributed: false
59
+ unused_parameters: true
60
+ sharded_ddp: false
61
+ cudnn_enabled: true
62
+ cudnn_benchmark: false
63
+ cudnn_deterministic: false
64
+ collect_stats: false
65
+ write_collected_feats: false
66
+ max_epoch: 1000
67
+ patience: null
68
+ val_scheduler_criterion:
69
+ - valid
70
+ - loss
71
+ early_stopping_criterion:
72
+ - valid
73
+ - loss
74
+ - min
75
+ best_model_criterion:
76
+ - - train
77
+ - total_count
78
+ - max
79
+ keep_nbest_models: 10
80
+ nbest_averaging_interval: 0
81
+ grad_clip: -1
82
+ grad_clip_type: 2.0
83
+ grad_noise: false
84
+ accum_grad: 1
85
+ no_forward_run: false
86
+ resume: true
87
+ train_dtype: float32
88
+ use_amp: false
89
+ log_interval: 50
90
+ use_matplotlib: true
91
+ use_tensorboard: true
92
+ create_graph_in_tensorboard: false
93
+ use_wandb: false
94
+ wandb_project: null
95
+ wandb_id: null
96
+ wandb_entity: null
97
+ wandb_name: null
98
+ wandb_model_log_interval: -1
99
+ detect_anomaly: false
100
+ use_lora: false
101
+ save_lora_only: true
102
+ lora_conf: {}
103
+ pretrain_path: null
104
+ init_param: []
105
+ ignore_init_mismatch: false
106
+ freeze_param: []
107
+ num_iters_per_epoch: 1000
108
+ batch_size: 20
109
+ valid_batch_size: null
110
+ batch_bins: 500000
111
+ valid_batch_bins: null
112
+ train_shape_file:
113
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/text_shape.phn
114
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/speech_shape
115
+ valid_shape_file:
116
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/text_shape.phn
117
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/speech_shape
118
+ batch_type: numel
119
+ valid_batch_type: null
120
+ fold_length:
121
+ - 150
122
+ - 240000
123
+ sort_in_batch: descending
124
+ shuffle_within_batch: false
125
+ sort_batch: descending
126
+ multiple_iterator: false
127
+ chunk_length: 500
128
+ chunk_shift_ratio: 0.5
129
+ num_cache_chunks: 1024
130
+ chunk_excluded_key_prefixes: []
131
+ chunk_default_fs: null
132
+ train_data_path_and_name_and_type:
133
+ - - dump/raw/tr_no_dev/text
134
+ - text
135
+ - text
136
+ - - dump/raw/tr_no_dev/wav.scp
137
+ - speech
138
+ - sound
139
+ - - dump/xvector/tr_no_dev/xvector.scp
140
+ - spembs
141
+ - kaldi_ark
142
+ valid_data_path_and_name_and_type:
143
+ - - dump/raw/dev/text
144
+ - text
145
+ - text
146
+ - - dump/raw/dev/wav.scp
147
+ - speech
148
+ - sound
149
+ - - dump/xvector/dev/xvector.scp
150
+ - spembs
151
+ - kaldi_ark
152
+ allow_variable_data_keys: false
153
+ max_cache_size: 0.0
154
+ max_cache_fd: 32
155
+ allow_multi_rates: false
156
+ valid_max_cache_size: null
157
+ exclude_weight_decay: false
158
+ exclude_weight_decay_conf: {}
159
+ optim: adamw
160
+ optim_conf:
161
+ lr: 0.0002
162
+ betas:
163
+ - 0.8
164
+ - 0.99
165
+ eps: 1.0e-09
166
+ weight_decay: 0.0
167
+ scheduler: exponentiallr
168
+ scheduler_conf:
169
+ gamma: 0.999875
170
+ optim2: adamw
171
+ optim2_conf:
172
+ lr: 0.0002
173
+ betas:
174
+ - 0.8
175
+ - 0.99
176
+ eps: 1.0e-09
177
+ weight_decay: 0.0
178
+ scheduler2: exponentiallr
179
+ scheduler2_conf:
180
+ gamma: 0.999875
181
+ generator_first: false
182
+ token_list:
183
+ - <blank>
184
+ - <unk>
185
+ - AH0
186
+ - T
187
+ - N
188
+ - S
189
+ - R
190
+ - IH1
191
+ - D
192
+ - L
193
+ - .
194
+ - Z
195
+ - DH
196
+ - K
197
+ - W
198
+ - M
199
+ - AE1
200
+ - EH1
201
+ - AA1
202
+ - IH0
203
+ - IY1
204
+ - AH1
205
+ - B
206
+ - P
207
+ - V
208
+ - ER0
209
+ - F
210
+ - HH
211
+ - AY1
212
+ - EY1
213
+ - UW1
214
+ - IY0
215
+ - AO1
216
+ - OW1
217
+ - G
218
+ - ','
219
+ - NG
220
+ - SH
221
+ - Y
222
+ - JH
223
+ - AW1
224
+ - UH1
225
+ - TH
226
+ - ER1
227
+ - CH
228
+ - '?'
229
+ - OW0
230
+ - OW2
231
+ - EH2
232
+ - EY2
233
+ - UW0
234
+ - IH2
235
+ - OY1
236
+ - AY2
237
+ - ZH
238
+ - AW2
239
+ - EH0
240
+ - IY2
241
+ - AA2
242
+ - AE0
243
+ - AH2
244
+ - AE2
245
+ - AO0
246
+ - AO2
247
+ - AY0
248
+ - UW2
249
+ - UH2
250
+ - AA0
251
+ - AW0
252
+ - EY0
253
+ - '!'
254
+ - UH0
255
+ - ER2
256
+ - OY2
257
+ - ''''
258
+ - OY0
259
+ - <sos/eos>
260
+ odim: null
261
+ model_conf: {}
262
+ use_preprocessor: true
263
+ token_type: phn
264
+ bpemodel: null
265
+ non_linguistic_symbols: null
266
+ cleaner: tacotron
267
+ g2p: g2p_en_no_space
268
+ feats_extract: fbank
269
+ feats_extract_conf:
270
+ n_fft: 2048
271
+ hop_length: 300
272
+ win_length: 1200
273
+ fs: 24000
274
+ fmin: 80
275
+ fmax: 7600
276
+ n_mels: 80
277
+ normalize: global_mvn
278
+ normalize_conf:
279
+ stats_file: exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz
280
+ tts: vits
281
+ tts_conf:
282
+ generator_type: vits_generator
283
+ generator_params:
284
+ hidden_channels: 192
285
+ spks: -1
286
+ spk_embed_dim: 192
287
+ global_channels: 256
288
+ segment_size: 32
289
+ text_encoder_attention_heads: 2
290
+ text_encoder_ffn_expand: 4
291
+ text_encoder_blocks: 6
292
+ text_encoder_positionwise_layer_type: conv1d
293
+ text_encoder_positionwise_conv_kernel_size: 3
294
+ text_encoder_positional_encoding_layer_type: rel_pos
295
+ text_encoder_self_attention_layer_type: rel_selfattn
296
+ text_encoder_activation_type: swish
297
+ text_encoder_normalize_before: true
298
+ text_encoder_dropout_rate: 0.1
299
+ text_encoder_positional_dropout_rate: 0.0
300
+ text_encoder_attention_dropout_rate: 0.1
301
+ use_macaron_style_in_text_encoder: true
302
+ use_conformer_conv_in_text_encoder: false
303
+ text_encoder_conformer_kernel_size: -1
304
+ decoder_kernel_size: 7
305
+ decoder_channels: 512
306
+ decoder_upsample_scales:
307
+ - 5
308
+ - 5
309
+ - 4
310
+ - 3
311
+ decoder_upsample_kernel_sizes:
312
+ - 10
313
+ - 10
314
+ - 8
315
+ - 6
316
+ decoder_resblock_kernel_sizes:
317
+ - 3
318
+ - 7
319
+ - 11
320
+ decoder_resblock_dilations:
321
+ - - 1
322
+ - 3
323
+ - 5
324
+ - - 1
325
+ - 3
326
+ - 5
327
+ - - 1
328
+ - 3
329
+ - 5
330
+ use_weight_norm_in_decoder: true
331
+ posterior_encoder_kernel_size: 5
332
+ posterior_encoder_layers: 16
333
+ posterior_encoder_stacks: 1
334
+ posterior_encoder_base_dilation: 1
335
+ posterior_encoder_dropout_rate: 0.0
336
+ use_weight_norm_in_posterior_encoder: true
337
+ flow_flows: 4
338
+ flow_kernel_size: 5
339
+ flow_base_dilation: 1
340
+ flow_layers: 4
341
+ flow_dropout_rate: 0.0
342
+ use_weight_norm_in_flow: true
343
+ use_only_mean_in_flow: true
344
+ stochastic_duration_predictor_kernel_size: 3
345
+ stochastic_duration_predictor_dropout_rate: 0.5
346
+ stochastic_duration_predictor_flows: 4
347
+ stochastic_duration_predictor_dds_conv_layers: 3
348
+ vocabs: 77
349
+ aux_channels: 80
350
+ discriminator_type: hifigan_multi_scale_multi_period_discriminator
351
+ discriminator_params:
352
+ scales: 1
353
+ scale_downsample_pooling: AvgPool1d
354
+ scale_downsample_pooling_params:
355
+ kernel_size: 4
356
+ stride: 2
357
+ padding: 2
358
+ scale_discriminator_params:
359
+ in_channels: 1
360
+ out_channels: 1
361
+ kernel_sizes:
362
+ - 15
363
+ - 41
364
+ - 5
365
+ - 3
366
+ channels: 128
367
+ max_downsample_channels: 1024
368
+ max_groups: 16
369
+ bias: true
370
+ downsample_scales:
371
+ - 2
372
+ - 2
373
+ - 4
374
+ - 4
375
+ - 1
376
+ nonlinear_activation: LeakyReLU
377
+ nonlinear_activation_params:
378
+ negative_slope: 0.1
379
+ use_weight_norm: true
380
+ use_spectral_norm: false
381
+ follow_official_norm: false
382
+ periods:
383
+ - 2
384
+ - 3
385
+ - 5
386
+ - 7
387
+ - 11
388
+ period_discriminator_params:
389
+ in_channels: 1
390
+ out_channels: 1
391
+ kernel_sizes:
392
+ - 5
393
+ - 3
394
+ channels: 32
395
+ downsample_scales:
396
+ - 3
397
+ - 3
398
+ - 3
399
+ - 3
400
+ - 1
401
+ max_downsample_channels: 1024
402
+ bias: true
403
+ nonlinear_activation: LeakyReLU
404
+ nonlinear_activation_params:
405
+ negative_slope: 0.1
406
+ use_weight_norm: true
407
+ use_spectral_norm: false
408
+ generator_adv_loss_params:
409
+ average_by_discriminators: false
410
+ loss_type: mse
411
+ discriminator_adv_loss_params:
412
+ average_by_discriminators: false
413
+ loss_type: mse
414
+ feat_match_loss_params:
415
+ average_by_discriminators: false
416
+ average_by_layers: false
417
+ include_final_outputs: true
418
+ mel_loss_params:
419
+ fs: 24000
420
+ n_fft: 2048
421
+ hop_length: 300
422
+ win_length: null
423
+ window: hann
424
+ n_mels: 80
425
+ fmin: 0
426
+ fmax: null
427
+ log_base: null
428
+ lambda_adv: 1.0
429
+ lambda_mel: 45.0
430
+ lambda_feat_match: 2.0
431
+ lambda_dur: 1.0
432
+ lambda_kl: 1.0
433
+ sampling_rate: 24000
434
+ cache_generator_outputs: true
435
+ pitch_extract: null
436
+ pitch_extract_conf: {}
437
+ pitch_normalize: null
438
+ pitch_normalize_conf: {}
439
+ energy_extract: null
440
+ energy_extract_conf: {}
441
+ energy_normalize: null
442
+ energy_normalize_conf: {}
443
+ required:
444
+ - output_dir
445
+ - token_list
446
+ version: '202310'
447
+ distributed: false
448
+ ```
449
+
450
+ </details>
451
+
452
+
453
+
454
+ ### Citing ESPnet
455
+
456
+ ```BibTex
457
+ @inproceedings{watanabe2018espnet,
458
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
459
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
460
+ year={2018},
461
+ booktitle={Proceedings of Interspeech},
462
+ pages={2207--2211},
463
+ doi={10.21437/Interspeech.2018-1456},
464
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
465
+ }
466
+
467
+
468
+
469
+
470
+ @inproceedings{hayashi2020espnet,
471
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
472
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
473
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
474
+ pages={7654--7658},
475
+ year={2020},
476
+ organization={IEEE}
477
+ }
478
+
479
+
480
+ ```
481
+
482
+ or arXiv:
483
+
484
+ ```bibtex
485
+ @misc{watanabe2018espnet,
486
+ title={ESPnet: End-to-End Speech Processing Toolkit},
487
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
488
+ year={2018},
489
+ eprint={1804.00015},
490
+ archivePrefix={arXiv},
491
+ primaryClass={cs.CL}
492
+ }
493
+ ```
dump/xvectorespnet/dev/spk_espnet.ark ADDED
Binary file (84.6 kB). View file
 
dump/xvectorespnet/dev/spk_espnet.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvectorespnet/dev/spk_xvector.ark:5
2
+ p226 dump/xvectorespnet/dev/spk_xvector.ark:788
3
+ p227 dump/xvectorespnet/dev/spk_xvector.ark:1571
4
+ p228 dump/xvectorespnet/dev/spk_xvector.ark:2354
5
+ p229 dump/xvectorespnet/dev/spk_xvector.ark:3137
6
+ p230 dump/xvectorespnet/dev/spk_xvector.ark:3920
7
+ p231 dump/xvectorespnet/dev/spk_xvector.ark:4703
8
+ p232 dump/xvectorespnet/dev/spk_xvector.ark:5486
9
+ p233 dump/xvectorespnet/dev/spk_xvector.ark:6269
10
+ p234 dump/xvectorespnet/dev/spk_xvector.ark:7052
11
+ p236 dump/xvectorespnet/dev/spk_xvector.ark:7835
12
+ p237 dump/xvectorespnet/dev/spk_xvector.ark:8618
13
+ p238 dump/xvectorespnet/dev/spk_xvector.ark:9401
14
+ p239 dump/xvectorespnet/dev/spk_xvector.ark:10184
15
+ p240 dump/xvectorespnet/dev/spk_xvector.ark:10967
16
+ p241 dump/xvectorespnet/dev/spk_xvector.ark:11750
17
+ p243 dump/xvectorespnet/dev/spk_xvector.ark:12533
18
+ p244 dump/xvectorespnet/dev/spk_xvector.ark:13316
19
+ p245 dump/xvectorespnet/dev/spk_xvector.ark:14099
20
+ p246 dump/xvectorespnet/dev/spk_xvector.ark:14882
21
+ p247 dump/xvectorespnet/dev/spk_xvector.ark:15665
22
+ p248 dump/xvectorespnet/dev/spk_xvector.ark:16448
23
+ p249 dump/xvectorespnet/dev/spk_xvector.ark:17231
24
+ p250 dump/xvectorespnet/dev/spk_xvector.ark:18014
25
+ p251 dump/xvectorespnet/dev/spk_xvector.ark:18797
26
+ p252 dump/xvectorespnet/dev/spk_xvector.ark:19580
27
+ p253 dump/xvectorespnet/dev/spk_xvector.ark:20363
28
+ p254 dump/xvectorespnet/dev/spk_xvector.ark:21146
29
+ p255 dump/xvectorespnet/dev/spk_xvector.ark:21929
30
+ p256 dump/xvectorespnet/dev/spk_xvector.ark:22712
31
+ p257 dump/xvectorespnet/dev/spk_xvector.ark:23495
32
+ p258 dump/xvectorespnet/dev/spk_xvector.ark:24278
33
+ p259 dump/xvectorespnet/dev/spk_xvector.ark:25061
34
+ p260 dump/xvectorespnet/dev/spk_xvector.ark:25844
35
+ p261 dump/xvectorespnet/dev/spk_xvector.ark:26627
36
+ p262 dump/xvectorespnet/dev/spk_xvector.ark:27410
37
+ p263 dump/xvectorespnet/dev/spk_xvector.ark:28193
38
+ p264 dump/xvectorespnet/dev/spk_xvector.ark:28976
39
+ p265 dump/xvectorespnet/dev/spk_xvector.ark:29759
40
+ p266 dump/xvectorespnet/dev/spk_xvector.ark:30542
41
+ p267 dump/xvectorespnet/dev/spk_xvector.ark:31325
42
+ p268 dump/xvectorespnet/dev/spk_xvector.ark:32108
43
+ p269 dump/xvectorespnet/dev/spk_xvector.ark:32891
44
+ p270 dump/xvectorespnet/dev/spk_xvector.ark:33674
45
+ p271 dump/xvectorespnet/dev/spk_xvector.ark:34457
46
+ p272 dump/xvectorespnet/dev/spk_xvector.ark:35240
47
+ p273 dump/xvectorespnet/dev/spk_xvector.ark:36023
48
+ p274 dump/xvectorespnet/dev/spk_xvector.ark:36806
49
+ p275 dump/xvectorespnet/dev/spk_xvector.ark:37589
50
+ p276 dump/xvectorespnet/dev/spk_xvector.ark:38372
51
+ p277 dump/xvectorespnet/dev/spk_xvector.ark:39155
52
+ p278 dump/xvectorespnet/dev/spk_xvector.ark:39938
53
+ p279 dump/xvectorespnet/dev/spk_xvector.ark:40721
54
+ p280 dump/xvectorespnet/dev/spk_xvector.ark:41504
55
+ p281 dump/xvectorespnet/dev/spk_xvector.ark:42287
56
+ p282 dump/xvectorespnet/dev/spk_xvector.ark:43070
57
+ p283 dump/xvectorespnet/dev/spk_xvector.ark:43853
58
+ p284 dump/xvectorespnet/dev/spk_xvector.ark:44636
59
+ p285 dump/xvectorespnet/dev/spk_xvector.ark:45419
60
+ p286 dump/xvectorespnet/dev/spk_xvector.ark:46202
61
+ p287 dump/xvectorespnet/dev/spk_xvector.ark:46985
62
+ p288 dump/xvectorespnet/dev/spk_xvector.ark:47768
63
+ p292 dump/xvectorespnet/dev/spk_xvector.ark:48551
64
+ p293 dump/xvectorespnet/dev/spk_xvector.ark:49334
65
+ p294 dump/xvectorespnet/dev/spk_xvector.ark:50117
66
+ p295 dump/xvectorespnet/dev/spk_xvector.ark:50900
67
+ p297 dump/xvectorespnet/dev/spk_xvector.ark:51683
68
+ p298 dump/xvectorespnet/dev/spk_xvector.ark:52466
69
+ p299 dump/xvectorespnet/dev/spk_xvector.ark:53249
70
+ p300 dump/xvectorespnet/dev/spk_xvector.ark:54032
71
+ p301 dump/xvectorespnet/dev/spk_xvector.ark:54815
72
+ p302 dump/xvectorespnet/dev/spk_xvector.ark:55598
73
+ p303 dump/xvectorespnet/dev/spk_xvector.ark:56381
74
+ p304 dump/xvectorespnet/dev/spk_xvector.ark:57164
75
+ p305 dump/xvectorespnet/dev/spk_xvector.ark:57947
76
+ p306 dump/xvectorespnet/dev/spk_xvector.ark:58730
77
+ p307 dump/xvectorespnet/dev/spk_xvector.ark:59513
78
+ p308 dump/xvectorespnet/dev/spk_xvector.ark:60296
79
+ p310 dump/xvectorespnet/dev/spk_xvector.ark:61079
80
+ p311 dump/xvectorespnet/dev/spk_xvector.ark:61862
81
+ p312 dump/xvectorespnet/dev/spk_xvector.ark:62645
82
+ p313 dump/xvectorespnet/dev/spk_xvector.ark:63428
83
+ p314 dump/xvectorespnet/dev/spk_xvector.ark:64211
84
+ p316 dump/xvectorespnet/dev/spk_xvector.ark:64994
85
+ p317 dump/xvectorespnet/dev/spk_xvector.ark:65777
86
+ p318 dump/xvectorespnet/dev/spk_xvector.ark:66560
87
+ p323 dump/xvectorespnet/dev/spk_xvector.ark:67343
88
+ p326 dump/xvectorespnet/dev/spk_xvector.ark:68126
89
+ p329 dump/xvectorespnet/dev/spk_xvector.ark:68909
90
+ p330 dump/xvectorespnet/dev/spk_xvector.ark:69692
91
+ p333 dump/xvectorespnet/dev/spk_xvector.ark:70475
92
+ p334 dump/xvectorespnet/dev/spk_xvector.ark:71258
93
+ p335 dump/xvectorespnet/dev/spk_xvector.ark:72041
94
+ p336 dump/xvectorespnet/dev/spk_xvector.ark:72824
95
+ p339 dump/xvectorespnet/dev/spk_xvector.ark:73607
96
+ p340 dump/xvectorespnet/dev/spk_xvector.ark:74390
97
+ p341 dump/xvectorespnet/dev/spk_xvector.ark:75173
98
+ p343 dump/xvectorespnet/dev/spk_xvector.ark:75956
99
+ p345 dump/xvectorespnet/dev/spk_xvector.ark:76739
100
+ p347 dump/xvectorespnet/dev/spk_xvector.ark:77522
101
+ p351 dump/xvectorespnet/dev/spk_xvector.ark:78305
102
+ p360 dump/xvectorespnet/dev/spk_xvector.ark:79088
103
+ p361 dump/xvectorespnet/dev/spk_xvector.ark:79871
104
+ p362 dump/xvectorespnet/dev/spk_xvector.ark:80654
105
+ p363 dump/xvectorespnet/dev/spk_xvector.ark:81437
106
+ p364 dump/xvectorespnet/dev/spk_xvector.ark:82220
107
+ p374 dump/xvectorespnet/dev/spk_xvector.ark:83003
108
+ p376 dump/xvectorespnet/dev/spk_xvector.ark:83786
dump/xvectorespnet/eval1/spk_espnet.ark ADDED
Binary file (84.6 kB). View file
 
dump/xvectorespnet/eval1/spk_espnet.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvectorespnet/eval1/spk_xvector.ark:5
2
+ p226 dump/xvectorespnet/eval1/spk_xvector.ark:788
3
+ p227 dump/xvectorespnet/eval1/spk_xvector.ark:1571
4
+ p228 dump/xvectorespnet/eval1/spk_xvector.ark:2354
5
+ p229 dump/xvectorespnet/eval1/spk_xvector.ark:3137
6
+ p230 dump/xvectorespnet/eval1/spk_xvector.ark:3920
7
+ p231 dump/xvectorespnet/eval1/spk_xvector.ark:4703
8
+ p232 dump/xvectorespnet/eval1/spk_xvector.ark:5486
9
+ p233 dump/xvectorespnet/eval1/spk_xvector.ark:6269
10
+ p234 dump/xvectorespnet/eval1/spk_xvector.ark:7052
11
+ p236 dump/xvectorespnet/eval1/spk_xvector.ark:7835
12
+ p237 dump/xvectorespnet/eval1/spk_xvector.ark:8618
13
+ p238 dump/xvectorespnet/eval1/spk_xvector.ark:9401
14
+ p239 dump/xvectorespnet/eval1/spk_xvector.ark:10184
15
+ p240 dump/xvectorespnet/eval1/spk_xvector.ark:10967
16
+ p241 dump/xvectorespnet/eval1/spk_xvector.ark:11750
17
+ p243 dump/xvectorespnet/eval1/spk_xvector.ark:12533
18
+ p244 dump/xvectorespnet/eval1/spk_xvector.ark:13316
19
+ p245 dump/xvectorespnet/eval1/spk_xvector.ark:14099
20
+ p246 dump/xvectorespnet/eval1/spk_xvector.ark:14882
21
+ p247 dump/xvectorespnet/eval1/spk_xvector.ark:15665
22
+ p248 dump/xvectorespnet/eval1/spk_xvector.ark:16448
23
+ p249 dump/xvectorespnet/eval1/spk_xvector.ark:17231
24
+ p250 dump/xvectorespnet/eval1/spk_xvector.ark:18014
25
+ p251 dump/xvectorespnet/eval1/spk_xvector.ark:18797
26
+ p252 dump/xvectorespnet/eval1/spk_xvector.ark:19580
27
+ p253 dump/xvectorespnet/eval1/spk_xvector.ark:20363
28
+ p254 dump/xvectorespnet/eval1/spk_xvector.ark:21146
29
+ p255 dump/xvectorespnet/eval1/spk_xvector.ark:21929
30
+ p256 dump/xvectorespnet/eval1/spk_xvector.ark:22712
31
+ p257 dump/xvectorespnet/eval1/spk_xvector.ark:23495
32
+ p258 dump/xvectorespnet/eval1/spk_xvector.ark:24278
33
+ p259 dump/xvectorespnet/eval1/spk_xvector.ark:25061
34
+ p260 dump/xvectorespnet/eval1/spk_xvector.ark:25844
35
+ p261 dump/xvectorespnet/eval1/spk_xvector.ark:26627
36
+ p262 dump/xvectorespnet/eval1/spk_xvector.ark:27410
37
+ p263 dump/xvectorespnet/eval1/spk_xvector.ark:28193
38
+ p264 dump/xvectorespnet/eval1/spk_xvector.ark:28976
39
+ p265 dump/xvectorespnet/eval1/spk_xvector.ark:29759
40
+ p266 dump/xvectorespnet/eval1/spk_xvector.ark:30542
41
+ p267 dump/xvectorespnet/eval1/spk_xvector.ark:31325
42
+ p268 dump/xvectorespnet/eval1/spk_xvector.ark:32108
43
+ p269 dump/xvectorespnet/eval1/spk_xvector.ark:32891
44
+ p270 dump/xvectorespnet/eval1/spk_xvector.ark:33674
45
+ p271 dump/xvectorespnet/eval1/spk_xvector.ark:34457
46
+ p272 dump/xvectorespnet/eval1/spk_xvector.ark:35240
47
+ p273 dump/xvectorespnet/eval1/spk_xvector.ark:36023
48
+ p274 dump/xvectorespnet/eval1/spk_xvector.ark:36806
49
+ p275 dump/xvectorespnet/eval1/spk_xvector.ark:37589
50
+ p276 dump/xvectorespnet/eval1/spk_xvector.ark:38372
51
+ p277 dump/xvectorespnet/eval1/spk_xvector.ark:39155
52
+ p278 dump/xvectorespnet/eval1/spk_xvector.ark:39938
53
+ p279 dump/xvectorespnet/eval1/spk_xvector.ark:40721
54
+ p280 dump/xvectorespnet/eval1/spk_xvector.ark:41504
55
+ p281 dump/xvectorespnet/eval1/spk_xvector.ark:42287
56
+ p282 dump/xvectorespnet/eval1/spk_xvector.ark:43070
57
+ p283 dump/xvectorespnet/eval1/spk_xvector.ark:43853
58
+ p284 dump/xvectorespnet/eval1/spk_xvector.ark:44636
59
+ p285 dump/xvectorespnet/eval1/spk_xvector.ark:45419
60
+ p286 dump/xvectorespnet/eval1/spk_xvector.ark:46202
61
+ p287 dump/xvectorespnet/eval1/spk_xvector.ark:46985
62
+ p288 dump/xvectorespnet/eval1/spk_xvector.ark:47768
63
+ p292 dump/xvectorespnet/eval1/spk_xvector.ark:48551
64
+ p293 dump/xvectorespnet/eval1/spk_xvector.ark:49334
65
+ p294 dump/xvectorespnet/eval1/spk_xvector.ark:50117
66
+ p295 dump/xvectorespnet/eval1/spk_xvector.ark:50900
67
+ p297 dump/xvectorespnet/eval1/spk_xvector.ark:51683
68
+ p298 dump/xvectorespnet/eval1/spk_xvector.ark:52466
69
+ p299 dump/xvectorespnet/eval1/spk_xvector.ark:53249
70
+ p300 dump/xvectorespnet/eval1/spk_xvector.ark:54032
71
+ p301 dump/xvectorespnet/eval1/spk_xvector.ark:54815
72
+ p302 dump/xvectorespnet/eval1/spk_xvector.ark:55598
73
+ p303 dump/xvectorespnet/eval1/spk_xvector.ark:56381
74
+ p304 dump/xvectorespnet/eval1/spk_xvector.ark:57164
75
+ p305 dump/xvectorespnet/eval1/spk_xvector.ark:57947
76
+ p306 dump/xvectorespnet/eval1/spk_xvector.ark:58730
77
+ p307 dump/xvectorespnet/eval1/spk_xvector.ark:59513
78
+ p308 dump/xvectorespnet/eval1/spk_xvector.ark:60296
79
+ p310 dump/xvectorespnet/eval1/spk_xvector.ark:61079
80
+ p311 dump/xvectorespnet/eval1/spk_xvector.ark:61862
81
+ p312 dump/xvectorespnet/eval1/spk_xvector.ark:62645
82
+ p313 dump/xvectorespnet/eval1/spk_xvector.ark:63428
83
+ p314 dump/xvectorespnet/eval1/spk_xvector.ark:64211
84
+ p316 dump/xvectorespnet/eval1/spk_xvector.ark:64994
85
+ p317 dump/xvectorespnet/eval1/spk_xvector.ark:65777
86
+ p318 dump/xvectorespnet/eval1/spk_xvector.ark:66560
87
+ p323 dump/xvectorespnet/eval1/spk_xvector.ark:67343
88
+ p326 dump/xvectorespnet/eval1/spk_xvector.ark:68126
89
+ p329 dump/xvectorespnet/eval1/spk_xvector.ark:68909
90
+ p330 dump/xvectorespnet/eval1/spk_xvector.ark:69692
91
+ p333 dump/xvectorespnet/eval1/spk_xvector.ark:70475
92
+ p334 dump/xvectorespnet/eval1/spk_xvector.ark:71258
93
+ p335 dump/xvectorespnet/eval1/spk_xvector.ark:72041
94
+ p336 dump/xvectorespnet/eval1/spk_xvector.ark:72824
95
+ p339 dump/xvectorespnet/eval1/spk_xvector.ark:73607
96
+ p340 dump/xvectorespnet/eval1/spk_xvector.ark:74390
97
+ p341 dump/xvectorespnet/eval1/spk_xvector.ark:75173
98
+ p343 dump/xvectorespnet/eval1/spk_xvector.ark:75956
99
+ p345 dump/xvectorespnet/eval1/spk_xvector.ark:76739
100
+ p347 dump/xvectorespnet/eval1/spk_xvector.ark:77522
101
+ p351 dump/xvectorespnet/eval1/spk_xvector.ark:78305
102
+ p360 dump/xvectorespnet/eval1/spk_xvector.ark:79088
103
+ p361 dump/xvectorespnet/eval1/spk_xvector.ark:79871
104
+ p362 dump/xvectorespnet/eval1/spk_xvector.ark:80654
105
+ p363 dump/xvectorespnet/eval1/spk_xvector.ark:81437
106
+ p364 dump/xvectorespnet/eval1/spk_xvector.ark:82220
107
+ p374 dump/xvectorespnet/eval1/spk_xvector.ark:83003
108
+ p376 dump/xvectorespnet/eval1/spk_xvector.ark:83786
dump/xvectorespnet/tr_no_dev/spk_espnet.ark ADDED
Binary file (84.6 kB). View file
 
dump/xvectorespnet/tr_no_dev/spk_espnet.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:5
2
+ p226 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:788
3
+ p227 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:1571
4
+ p228 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:2354
5
+ p229 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:3137
6
+ p230 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:3920
7
+ p231 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:4703
8
+ p232 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:5486
9
+ p233 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:6269
10
+ p234 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:7052
11
+ p236 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:7835
12
+ p237 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:8618
13
+ p238 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:9401
14
+ p239 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:10184
15
+ p240 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:10967
16
+ p241 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:11750
17
+ p243 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:12533
18
+ p244 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:13316
19
+ p245 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:14099
20
+ p246 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:14882
21
+ p247 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:15665
22
+ p248 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:16448
23
+ p249 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:17231
24
+ p250 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:18014
25
+ p251 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:18797
26
+ p252 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:19580
27
+ p253 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:20363
28
+ p254 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:21146
29
+ p255 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:21929
30
+ p256 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:22712
31
+ p257 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:23495
32
+ p258 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:24278
33
+ p259 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:25061
34
+ p260 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:25844
35
+ p261 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:26627
36
+ p262 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:27410
37
+ p263 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:28193
38
+ p264 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:28976
39
+ p265 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:29759
40
+ p266 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:30542
41
+ p267 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:31325
42
+ p268 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:32108
43
+ p269 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:32891
44
+ p270 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:33674
45
+ p271 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:34457
46
+ p272 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:35240
47
+ p273 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:36023
48
+ p274 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:36806
49
+ p275 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:37589
50
+ p276 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:38372
51
+ p277 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:39155
52
+ p278 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:39938
53
+ p279 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:40721
54
+ p280 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:41504
55
+ p281 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:42287
56
+ p282 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:43070
57
+ p283 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:43853
58
+ p284 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:44636
59
+ p285 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:45419
60
+ p286 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:46202
61
+ p287 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:46985
62
+ p288 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:47768
63
+ p292 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:48551
64
+ p293 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:49334
65
+ p294 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:50117
66
+ p295 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:50900
67
+ p297 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:51683
68
+ p298 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:52466
69
+ p299 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:53249
70
+ p300 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:54032
71
+ p301 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:54815
72
+ p302 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:55598
73
+ p303 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:56381
74
+ p304 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:57164
75
+ p305 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:57947
76
+ p306 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:58730
77
+ p307 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:59513
78
+ p308 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:60296
79
+ p310 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:61079
80
+ p311 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:61862
81
+ p312 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:62645
82
+ p313 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:63428
83
+ p314 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:64211
84
+ p316 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:64994
85
+ p317 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:65777
86
+ p318 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:66560
87
+ p323 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:67343
88
+ p326 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:68126
89
+ p329 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:68909
90
+ p330 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:69692
91
+ p333 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:70475
92
+ p334 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:71258
93
+ p335 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:72041
94
+ p336 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:72824
95
+ p339 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:73607
96
+ p340 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:74390
97
+ p341 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:75173
98
+ p343 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:75956
99
+ p345 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:76739
100
+ p347 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:77522
101
+ p351 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:78305
102
+ p360 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:79088
103
+ p361 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:79871
104
+ p362 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:80654
105
+ p363 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:81437
106
+ p364 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:82220
107
+ p374 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:83003
108
+ p376 dump/xvectorespnet/tr_no_dev/spk_xvector.ark:83786
exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fc3efee0ed9c90607b65a80c2016a36198aded0b677b09ad2da807f3230cc00
3
+ size 1402
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/460epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e205f3fde6a8ed2f5247ffa2c89fab0f6fdd005d14c6fa3c67fc3aad769a3a23
3
+ size 382531214
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_xvector_vits.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 4
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 1000
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - train
40
+ - total_count
41
+ - max
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: -1
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: 50
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: 1000
71
+ batch_size: 20
72
+ valid_batch_size: null
73
+ batch_bins: 500000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/text_shape.phn
77
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/speech_shape
78
+ valid_shape_file:
79
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/text_shape.phn
80
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/speech_shape
81
+ batch_type: numel
82
+ valid_batch_type: null
83
+ fold_length:
84
+ - 150
85
+ - 240000
86
+ sort_in_batch: descending
87
+ shuffle_within_batch: false
88
+ sort_batch: descending
89
+ multiple_iterator: false
90
+ chunk_length: 500
91
+ chunk_shift_ratio: 0.5
92
+ num_cache_chunks: 1024
93
+ chunk_excluded_key_prefixes: []
94
+ chunk_default_fs: null
95
+ train_data_path_and_name_and_type:
96
+ - - dump/raw/tr_no_dev/text
97
+ - text
98
+ - text
99
+ - - dump/raw/tr_no_dev/wav.scp
100
+ - speech
101
+ - sound
102
+ - - dump/xvector/tr_no_dev/xvector.scp
103
+ - spembs
104
+ - kaldi_ark
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/dev/text
107
+ - text
108
+ - text
109
+ - - dump/raw/dev/wav.scp
110
+ - speech
111
+ - sound
112
+ - - dump/xvector/dev/xvector.scp
113
+ - spembs
114
+ - kaldi_ark
115
+ allow_variable_data_keys: false
116
+ max_cache_size: 0.0
117
+ max_cache_fd: 32
118
+ allow_multi_rates: false
119
+ valid_max_cache_size: null
120
+ exclude_weight_decay: false
121
+ exclude_weight_decay_conf: {}
122
+ optim: adamw
123
+ optim_conf:
124
+ lr: 0.0002
125
+ betas:
126
+ - 0.8
127
+ - 0.99
128
+ eps: 1.0e-09
129
+ weight_decay: 0.0
130
+ scheduler: exponentiallr
131
+ scheduler_conf:
132
+ gamma: 0.999875
133
+ optim2: adamw
134
+ optim2_conf:
135
+ lr: 0.0002
136
+ betas:
137
+ - 0.8
138
+ - 0.99
139
+ eps: 1.0e-09
140
+ weight_decay: 0.0
141
+ scheduler2: exponentiallr
142
+ scheduler2_conf:
143
+ gamma: 0.999875
144
+ generator_first: false
145
+ token_list:
146
+ - <blank>
147
+ - <unk>
148
+ - AH0
149
+ - T
150
+ - N
151
+ - S
152
+ - R
153
+ - IH1
154
+ - D
155
+ - L
156
+ - .
157
+ - Z
158
+ - DH
159
+ - K
160
+ - W
161
+ - M
162
+ - AE1
163
+ - EH1
164
+ - AA1
165
+ - IH0
166
+ - IY1
167
+ - AH1
168
+ - B
169
+ - P
170
+ - V
171
+ - ER0
172
+ - F
173
+ - HH
174
+ - AY1
175
+ - EY1
176
+ - UW1
177
+ - IY0
178
+ - AO1
179
+ - OW1
180
+ - G
181
+ - ','
182
+ - NG
183
+ - SH
184
+ - Y
185
+ - JH
186
+ - AW1
187
+ - UH1
188
+ - TH
189
+ - ER1
190
+ - CH
191
+ - '?'
192
+ - OW0
193
+ - OW2
194
+ - EH2
195
+ - EY2
196
+ - UW0
197
+ - IH2
198
+ - OY1
199
+ - AY2
200
+ - ZH
201
+ - AW2
202
+ - EH0
203
+ - IY2
204
+ - AA2
205
+ - AE0
206
+ - AH2
207
+ - AE2
208
+ - AO0
209
+ - AO2
210
+ - AY0
211
+ - UW2
212
+ - UH2
213
+ - AA0
214
+ - AW0
215
+ - EY0
216
+ - '!'
217
+ - UH0
218
+ - ER2
219
+ - OY2
220
+ - ''''
221
+ - OY0
222
+ - <sos/eos>
223
+ odim: null
224
+ model_conf: {}
225
+ use_preprocessor: true
226
+ token_type: phn
227
+ bpemodel: null
228
+ non_linguistic_symbols: null
229
+ cleaner: tacotron
230
+ g2p: g2p_en_no_space
231
+ feats_extract: fbank
232
+ feats_extract_conf:
233
+ n_fft: 2048
234
+ hop_length: 300
235
+ win_length: 1200
236
+ fs: 24000
237
+ fmin: 80
238
+ fmax: 7600
239
+ n_mels: 80
240
+ normalize: global_mvn
241
+ normalize_conf:
242
+ stats_file: exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz
243
+ tts: vits
244
+ tts_conf:
245
+ generator_type: vits_generator
246
+ generator_params:
247
+ hidden_channels: 192
248
+ spks: -1
249
+ spk_embed_dim: 192
250
+ global_channels: 256
251
+ segment_size: 32
252
+ text_encoder_attention_heads: 2
253
+ text_encoder_ffn_expand: 4
254
+ text_encoder_blocks: 6
255
+ text_encoder_positionwise_layer_type: conv1d
256
+ text_encoder_positionwise_conv_kernel_size: 3
257
+ text_encoder_positional_encoding_layer_type: rel_pos
258
+ text_encoder_self_attention_layer_type: rel_selfattn
259
+ text_encoder_activation_type: swish
260
+ text_encoder_normalize_before: true
261
+ text_encoder_dropout_rate: 0.1
262
+ text_encoder_positional_dropout_rate: 0.0
263
+ text_encoder_attention_dropout_rate: 0.1
264
+ use_macaron_style_in_text_encoder: true
265
+ use_conformer_conv_in_text_encoder: false
266
+ text_encoder_conformer_kernel_size: -1
267
+ decoder_kernel_size: 7
268
+ decoder_channels: 512
269
+ decoder_upsample_scales:
270
+ - 5
271
+ - 5
272
+ - 4
273
+ - 3
274
+ decoder_upsample_kernel_sizes:
275
+ - 10
276
+ - 10
277
+ - 8
278
+ - 6
279
+ decoder_resblock_kernel_sizes:
280
+ - 3
281
+ - 7
282
+ - 11
283
+ decoder_resblock_dilations:
284
+ - - 1
285
+ - 3
286
+ - 5
287
+ - - 1
288
+ - 3
289
+ - 5
290
+ - - 1
291
+ - 3
292
+ - 5
293
+ use_weight_norm_in_decoder: true
294
+ posterior_encoder_kernel_size: 5
295
+ posterior_encoder_layers: 16
296
+ posterior_encoder_stacks: 1
297
+ posterior_encoder_base_dilation: 1
298
+ posterior_encoder_dropout_rate: 0.0
299
+ use_weight_norm_in_posterior_encoder: true
300
+ flow_flows: 4
301
+ flow_kernel_size: 5
302
+ flow_base_dilation: 1
303
+ flow_layers: 4
304
+ flow_dropout_rate: 0.0
305
+ use_weight_norm_in_flow: true
306
+ use_only_mean_in_flow: true
307
+ stochastic_duration_predictor_kernel_size: 3
308
+ stochastic_duration_predictor_dropout_rate: 0.5
309
+ stochastic_duration_predictor_flows: 4
310
+ stochastic_duration_predictor_dds_conv_layers: 3
311
+ vocabs: 77
312
+ aux_channels: 80
313
+ discriminator_type: hifigan_multi_scale_multi_period_discriminator
314
+ discriminator_params:
315
+ scales: 1
316
+ scale_downsample_pooling: AvgPool1d
317
+ scale_downsample_pooling_params:
318
+ kernel_size: 4
319
+ stride: 2
320
+ padding: 2
321
+ scale_discriminator_params:
322
+ in_channels: 1
323
+ out_channels: 1
324
+ kernel_sizes:
325
+ - 15
326
+ - 41
327
+ - 5
328
+ - 3
329
+ channels: 128
330
+ max_downsample_channels: 1024
331
+ max_groups: 16
332
+ bias: true
333
+ downsample_scales:
334
+ - 2
335
+ - 2
336
+ - 4
337
+ - 4
338
+ - 1
339
+ nonlinear_activation: LeakyReLU
340
+ nonlinear_activation_params:
341
+ negative_slope: 0.1
342
+ use_weight_norm: true
343
+ use_spectral_norm: false
344
+ follow_official_norm: false
345
+ periods:
346
+ - 2
347
+ - 3
348
+ - 5
349
+ - 7
350
+ - 11
351
+ period_discriminator_params:
352
+ in_channels: 1
353
+ out_channels: 1
354
+ kernel_sizes:
355
+ - 5
356
+ - 3
357
+ channels: 32
358
+ downsample_scales:
359
+ - 3
360
+ - 3
361
+ - 3
362
+ - 3
363
+ - 1
364
+ max_downsample_channels: 1024
365
+ bias: true
366
+ nonlinear_activation: LeakyReLU
367
+ nonlinear_activation_params:
368
+ negative_slope: 0.1
369
+ use_weight_norm: true
370
+ use_spectral_norm: false
371
+ generator_adv_loss_params:
372
+ average_by_discriminators: false
373
+ loss_type: mse
374
+ discriminator_adv_loss_params:
375
+ average_by_discriminators: false
376
+ loss_type: mse
377
+ feat_match_loss_params:
378
+ average_by_discriminators: false
379
+ average_by_layers: false
380
+ include_final_outputs: true
381
+ mel_loss_params:
382
+ fs: 24000
383
+ n_fft: 2048
384
+ hop_length: 300
385
+ win_length: null
386
+ window: hann
387
+ n_mels: 80
388
+ fmin: 0
389
+ fmax: null
390
+ log_base: null
391
+ lambda_adv: 1.0
392
+ lambda_mel: 45.0
393
+ lambda_feat_match: 2.0
394
+ lambda_dur: 1.0
395
+ lambda_kl: 1.0
396
+ sampling_rate: 24000
397
+ cache_generator_outputs: true
398
+ pitch_extract: null
399
+ pitch_extract_conf: {}
400
+ pitch_normalize: null
401
+ pitch_normalize_conf: {}
402
+ energy_extract: null
403
+ energy_extract_conf: {}
404
+ energy_normalize: null
405
+ energy_normalize_conf: {}
406
+ required:
407
+ - output_dir
408
+ - token_list
409
+ version: '202310'
410
+ distributed: false
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_backward_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_fake_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_forward_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_optim_step_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_real_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/discriminator_train_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_adv_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_backward_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_dur_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_feat_match_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_forward_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_kl_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_mel_loss.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_optim_step_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/generator_train_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/gpu_max_cached_mem_GB.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/optim0_lr0.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/optim1_lr0.png ADDED
exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ model_file: exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/460epoch.pth
4
+ python: "3.9.18 (main, Sep 11 2023, 13:41:44) \n[GCC 11.2.0]"
5
+ timestamp: 1713542459.980234
6
+ torch: 2.1.0+cu118
7
+ yaml_files:
8
+ train_config: exp/tts_train_xvector_vits_raw_phn_tacotron_g2p_en_no_space/config.yaml