Siddhant commited on
Commit
7b563d4
1 Parent(s): 87e7277

import from zenodo

Browse files
Files changed (24) hide show
  1. README.md +50 -0
  2. dump/xvector/dev/spk_xvector.ark +0 -0
  3. dump/xvector/dev/spk_xvector.scp +108 -0
  4. dump/xvector/eval1/spk_xvector.ark +0 -0
  5. dump/xvector/eval1/spk_xvector.scp +108 -0
  6. dump/xvector/tr_no_dev/spk_xvector.ark +0 -0
  7. dump/xvector/tr_no_dev/spk_xvector.scp +108 -0
  8. exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz +0 -0
  9. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/config.yaml +271 -0
  10. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/backward_time.png +0 -0
  11. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/bce_loss.png +0 -0
  12. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/decoder_alpha.png +0 -0
  13. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/enc_dec_attn_loss.png +0 -0
  14. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/encoder_alpha.png +0 -0
  15. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/forward_time.png +0 -0
  16. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png +0 -0
  17. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/l1_loss.png +0 -0
  18. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/l2_loss.png +0 -0
  19. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/loss.png +0 -0
  20. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/lr_0.png +0 -0
  21. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/optim_step_time.png +0 -0
  22. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/train_time.png +0 -0
  23. exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth +3 -0
  24. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language: en
7
+ datasets:
8
+ - vctk
9
+ license: cc-by-4.0
10
+ ---
11
+ ## Example ESPnet2 TTS model
12
+ ### `kan-bayashi/vctk_gst+xvector_transformer`
13
+ ♻️ Imported from https://zenodo.org/record/4393277/
14
+
15
+ This model was trained by kan-bayashi using vctk/tts1 recipe in [espnet](https://github.com/espnet/espnet/).
16
+ ### Demo: How to use in ESPnet2
17
+ ```python
18
+ # coming soon
19
+ ```
20
+ ### Citing ESPnet
21
+ ```BibTex
22
+ @inproceedings{watanabe2018espnet,
23
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
24
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
25
+ year={2018},
26
+ booktitle={Proceedings of Interspeech},
27
+ pages={2207--2211},
28
+ doi={10.21437/Interspeech.2018-1456},
29
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
30
+ }
31
+ @inproceedings{hayashi2020espnet,
32
+ title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
33
+ author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
34
+ booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
35
+ pages={7654--7658},
36
+ year={2020},
37
+ organization={IEEE}
38
+ }
39
+ ```
40
+ or arXiv:
41
+ ```bibtex
42
+ @misc{watanabe2018espnet,
43
+ title={ESPnet: End-to-End Speech Processing Toolkit},
44
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Enrique Yalta Soplin and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
45
+ year={2018},
46
+ eprint={1804.00015},
47
+ archivePrefix={arXiv},
48
+ primaryClass={cs.CL}
49
+ }
50
+ ```
dump/xvector/dev/spk_xvector.ark ADDED
Binary file (223 kB). View file
 
dump/xvector/dev/spk_xvector.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvector/dev/spk_xvector.ark:5
2
+ p226 dump/xvector/dev/spk_xvector.ark:2068
3
+ p227 dump/xvector/dev/spk_xvector.ark:4131
4
+ p228 dump/xvector/dev/spk_xvector.ark:6194
5
+ p229 dump/xvector/dev/spk_xvector.ark:8257
6
+ p230 dump/xvector/dev/spk_xvector.ark:10320
7
+ p231 dump/xvector/dev/spk_xvector.ark:12383
8
+ p232 dump/xvector/dev/spk_xvector.ark:14446
9
+ p233 dump/xvector/dev/spk_xvector.ark:16509
10
+ p234 dump/xvector/dev/spk_xvector.ark:18572
11
+ p236 dump/xvector/dev/spk_xvector.ark:20635
12
+ p237 dump/xvector/dev/spk_xvector.ark:22698
13
+ p238 dump/xvector/dev/spk_xvector.ark:24761
14
+ p239 dump/xvector/dev/spk_xvector.ark:26824
15
+ p240 dump/xvector/dev/spk_xvector.ark:28887
16
+ p241 dump/xvector/dev/spk_xvector.ark:30950
17
+ p243 dump/xvector/dev/spk_xvector.ark:33013
18
+ p244 dump/xvector/dev/spk_xvector.ark:35076
19
+ p245 dump/xvector/dev/spk_xvector.ark:37139
20
+ p246 dump/xvector/dev/spk_xvector.ark:39202
21
+ p247 dump/xvector/dev/spk_xvector.ark:41265
22
+ p248 dump/xvector/dev/spk_xvector.ark:43328
23
+ p249 dump/xvector/dev/spk_xvector.ark:45391
24
+ p250 dump/xvector/dev/spk_xvector.ark:47454
25
+ p251 dump/xvector/dev/spk_xvector.ark:49517
26
+ p252 dump/xvector/dev/spk_xvector.ark:51580
27
+ p253 dump/xvector/dev/spk_xvector.ark:53643
28
+ p254 dump/xvector/dev/spk_xvector.ark:55706
29
+ p255 dump/xvector/dev/spk_xvector.ark:57769
30
+ p256 dump/xvector/dev/spk_xvector.ark:59832
31
+ p257 dump/xvector/dev/spk_xvector.ark:61895
32
+ p258 dump/xvector/dev/spk_xvector.ark:63958
33
+ p259 dump/xvector/dev/spk_xvector.ark:66021
34
+ p260 dump/xvector/dev/spk_xvector.ark:68084
35
+ p261 dump/xvector/dev/spk_xvector.ark:70147
36
+ p262 dump/xvector/dev/spk_xvector.ark:72210
37
+ p263 dump/xvector/dev/spk_xvector.ark:74273
38
+ p264 dump/xvector/dev/spk_xvector.ark:76336
39
+ p265 dump/xvector/dev/spk_xvector.ark:78399
40
+ p266 dump/xvector/dev/spk_xvector.ark:80462
41
+ p267 dump/xvector/dev/spk_xvector.ark:82525
42
+ p268 dump/xvector/dev/spk_xvector.ark:84588
43
+ p269 dump/xvector/dev/spk_xvector.ark:86651
44
+ p270 dump/xvector/dev/spk_xvector.ark:88714
45
+ p271 dump/xvector/dev/spk_xvector.ark:90777
46
+ p272 dump/xvector/dev/spk_xvector.ark:92840
47
+ p273 dump/xvector/dev/spk_xvector.ark:94903
48
+ p274 dump/xvector/dev/spk_xvector.ark:96966
49
+ p275 dump/xvector/dev/spk_xvector.ark:99029
50
+ p276 dump/xvector/dev/spk_xvector.ark:101092
51
+ p277 dump/xvector/dev/spk_xvector.ark:103155
52
+ p278 dump/xvector/dev/spk_xvector.ark:105218
53
+ p279 dump/xvector/dev/spk_xvector.ark:107281
54
+ p280 dump/xvector/dev/spk_xvector.ark:109344
55
+ p281 dump/xvector/dev/spk_xvector.ark:111407
56
+ p282 dump/xvector/dev/spk_xvector.ark:113470
57
+ p283 dump/xvector/dev/spk_xvector.ark:115533
58
+ p284 dump/xvector/dev/spk_xvector.ark:117596
59
+ p285 dump/xvector/dev/spk_xvector.ark:119659
60
+ p286 dump/xvector/dev/spk_xvector.ark:121722
61
+ p287 dump/xvector/dev/spk_xvector.ark:123785
62
+ p288 dump/xvector/dev/spk_xvector.ark:125848
63
+ p292 dump/xvector/dev/spk_xvector.ark:127911
64
+ p293 dump/xvector/dev/spk_xvector.ark:129974
65
+ p294 dump/xvector/dev/spk_xvector.ark:132037
66
+ p295 dump/xvector/dev/spk_xvector.ark:134100
67
+ p297 dump/xvector/dev/spk_xvector.ark:136163
68
+ p298 dump/xvector/dev/spk_xvector.ark:138226
69
+ p299 dump/xvector/dev/spk_xvector.ark:140289
70
+ p300 dump/xvector/dev/spk_xvector.ark:142352
71
+ p301 dump/xvector/dev/spk_xvector.ark:144415
72
+ p302 dump/xvector/dev/spk_xvector.ark:146478
73
+ p303 dump/xvector/dev/spk_xvector.ark:148541
74
+ p304 dump/xvector/dev/spk_xvector.ark:150604
75
+ p305 dump/xvector/dev/spk_xvector.ark:152667
76
+ p306 dump/xvector/dev/spk_xvector.ark:154730
77
+ p307 dump/xvector/dev/spk_xvector.ark:156793
78
+ p308 dump/xvector/dev/spk_xvector.ark:158856
79
+ p310 dump/xvector/dev/spk_xvector.ark:160919
80
+ p311 dump/xvector/dev/spk_xvector.ark:162982
81
+ p312 dump/xvector/dev/spk_xvector.ark:165045
82
+ p313 dump/xvector/dev/spk_xvector.ark:167108
83
+ p314 dump/xvector/dev/spk_xvector.ark:169171
84
+ p316 dump/xvector/dev/spk_xvector.ark:171234
85
+ p317 dump/xvector/dev/spk_xvector.ark:173297
86
+ p318 dump/xvector/dev/spk_xvector.ark:175360
87
+ p323 dump/xvector/dev/spk_xvector.ark:177423
88
+ p326 dump/xvector/dev/spk_xvector.ark:179486
89
+ p329 dump/xvector/dev/spk_xvector.ark:181549
90
+ p330 dump/xvector/dev/spk_xvector.ark:183612
91
+ p333 dump/xvector/dev/spk_xvector.ark:185675
92
+ p334 dump/xvector/dev/spk_xvector.ark:187738
93
+ p335 dump/xvector/dev/spk_xvector.ark:189801
94
+ p336 dump/xvector/dev/spk_xvector.ark:191864
95
+ p339 dump/xvector/dev/spk_xvector.ark:193927
96
+ p340 dump/xvector/dev/spk_xvector.ark:195990
97
+ p341 dump/xvector/dev/spk_xvector.ark:198053
98
+ p343 dump/xvector/dev/spk_xvector.ark:200116
99
+ p345 dump/xvector/dev/spk_xvector.ark:202179
100
+ p347 dump/xvector/dev/spk_xvector.ark:204242
101
+ p351 dump/xvector/dev/spk_xvector.ark:206305
102
+ p360 dump/xvector/dev/spk_xvector.ark:208368
103
+ p361 dump/xvector/dev/spk_xvector.ark:210431
104
+ p362 dump/xvector/dev/spk_xvector.ark:212494
105
+ p363 dump/xvector/dev/spk_xvector.ark:214557
106
+ p364 dump/xvector/dev/spk_xvector.ark:216620
107
+ p374 dump/xvector/dev/spk_xvector.ark:218683
108
+ p376 dump/xvector/dev/spk_xvector.ark:220746
dump/xvector/eval1/spk_xvector.ark ADDED
Binary file (223 kB). View file
 
dump/xvector/eval1/spk_xvector.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvector/eval1/spk_xvector.ark:5
2
+ p226 dump/xvector/eval1/spk_xvector.ark:2068
3
+ p227 dump/xvector/eval1/spk_xvector.ark:4131
4
+ p228 dump/xvector/eval1/spk_xvector.ark:6194
5
+ p229 dump/xvector/eval1/spk_xvector.ark:8257
6
+ p230 dump/xvector/eval1/spk_xvector.ark:10320
7
+ p231 dump/xvector/eval1/spk_xvector.ark:12383
8
+ p232 dump/xvector/eval1/spk_xvector.ark:14446
9
+ p233 dump/xvector/eval1/spk_xvector.ark:16509
10
+ p234 dump/xvector/eval1/spk_xvector.ark:18572
11
+ p236 dump/xvector/eval1/spk_xvector.ark:20635
12
+ p237 dump/xvector/eval1/spk_xvector.ark:22698
13
+ p238 dump/xvector/eval1/spk_xvector.ark:24761
14
+ p239 dump/xvector/eval1/spk_xvector.ark:26824
15
+ p240 dump/xvector/eval1/spk_xvector.ark:28887
16
+ p241 dump/xvector/eval1/spk_xvector.ark:30950
17
+ p243 dump/xvector/eval1/spk_xvector.ark:33013
18
+ p244 dump/xvector/eval1/spk_xvector.ark:35076
19
+ p245 dump/xvector/eval1/spk_xvector.ark:37139
20
+ p246 dump/xvector/eval1/spk_xvector.ark:39202
21
+ p247 dump/xvector/eval1/spk_xvector.ark:41265
22
+ p248 dump/xvector/eval1/spk_xvector.ark:43328
23
+ p249 dump/xvector/eval1/spk_xvector.ark:45391
24
+ p250 dump/xvector/eval1/spk_xvector.ark:47454
25
+ p251 dump/xvector/eval1/spk_xvector.ark:49517
26
+ p252 dump/xvector/eval1/spk_xvector.ark:51580
27
+ p253 dump/xvector/eval1/spk_xvector.ark:53643
28
+ p254 dump/xvector/eval1/spk_xvector.ark:55706
29
+ p255 dump/xvector/eval1/spk_xvector.ark:57769
30
+ p256 dump/xvector/eval1/spk_xvector.ark:59832
31
+ p257 dump/xvector/eval1/spk_xvector.ark:61895
32
+ p258 dump/xvector/eval1/spk_xvector.ark:63958
33
+ p259 dump/xvector/eval1/spk_xvector.ark:66021
34
+ p260 dump/xvector/eval1/spk_xvector.ark:68084
35
+ p261 dump/xvector/eval1/spk_xvector.ark:70147
36
+ p262 dump/xvector/eval1/spk_xvector.ark:72210
37
+ p263 dump/xvector/eval1/spk_xvector.ark:74273
38
+ p264 dump/xvector/eval1/spk_xvector.ark:76336
39
+ p265 dump/xvector/eval1/spk_xvector.ark:78399
40
+ p266 dump/xvector/eval1/spk_xvector.ark:80462
41
+ p267 dump/xvector/eval1/spk_xvector.ark:82525
42
+ p268 dump/xvector/eval1/spk_xvector.ark:84588
43
+ p269 dump/xvector/eval1/spk_xvector.ark:86651
44
+ p270 dump/xvector/eval1/spk_xvector.ark:88714
45
+ p271 dump/xvector/eval1/spk_xvector.ark:90777
46
+ p272 dump/xvector/eval1/spk_xvector.ark:92840
47
+ p273 dump/xvector/eval1/spk_xvector.ark:94903
48
+ p274 dump/xvector/eval1/spk_xvector.ark:96966
49
+ p275 dump/xvector/eval1/spk_xvector.ark:99029
50
+ p276 dump/xvector/eval1/spk_xvector.ark:101092
51
+ p277 dump/xvector/eval1/spk_xvector.ark:103155
52
+ p278 dump/xvector/eval1/spk_xvector.ark:105218
53
+ p279 dump/xvector/eval1/spk_xvector.ark:107281
54
+ p280 dump/xvector/eval1/spk_xvector.ark:109344
55
+ p281 dump/xvector/eval1/spk_xvector.ark:111407
56
+ p282 dump/xvector/eval1/spk_xvector.ark:113470
57
+ p283 dump/xvector/eval1/spk_xvector.ark:115533
58
+ p284 dump/xvector/eval1/spk_xvector.ark:117596
59
+ p285 dump/xvector/eval1/spk_xvector.ark:119659
60
+ p286 dump/xvector/eval1/spk_xvector.ark:121722
61
+ p287 dump/xvector/eval1/spk_xvector.ark:123785
62
+ p288 dump/xvector/eval1/spk_xvector.ark:125848
63
+ p292 dump/xvector/eval1/spk_xvector.ark:127911
64
+ p293 dump/xvector/eval1/spk_xvector.ark:129974
65
+ p294 dump/xvector/eval1/spk_xvector.ark:132037
66
+ p295 dump/xvector/eval1/spk_xvector.ark:134100
67
+ p297 dump/xvector/eval1/spk_xvector.ark:136163
68
+ p298 dump/xvector/eval1/spk_xvector.ark:138226
69
+ p299 dump/xvector/eval1/spk_xvector.ark:140289
70
+ p300 dump/xvector/eval1/spk_xvector.ark:142352
71
+ p301 dump/xvector/eval1/spk_xvector.ark:144415
72
+ p302 dump/xvector/eval1/spk_xvector.ark:146478
73
+ p303 dump/xvector/eval1/spk_xvector.ark:148541
74
+ p304 dump/xvector/eval1/spk_xvector.ark:150604
75
+ p305 dump/xvector/eval1/spk_xvector.ark:152667
76
+ p306 dump/xvector/eval1/spk_xvector.ark:154730
77
+ p307 dump/xvector/eval1/spk_xvector.ark:156793
78
+ p308 dump/xvector/eval1/spk_xvector.ark:158856
79
+ p310 dump/xvector/eval1/spk_xvector.ark:160919
80
+ p311 dump/xvector/eval1/spk_xvector.ark:162982
81
+ p312 dump/xvector/eval1/spk_xvector.ark:165045
82
+ p313 dump/xvector/eval1/spk_xvector.ark:167108
83
+ p314 dump/xvector/eval1/spk_xvector.ark:169171
84
+ p316 dump/xvector/eval1/spk_xvector.ark:171234
85
+ p317 dump/xvector/eval1/spk_xvector.ark:173297
86
+ p318 dump/xvector/eval1/spk_xvector.ark:175360
87
+ p323 dump/xvector/eval1/spk_xvector.ark:177423
88
+ p326 dump/xvector/eval1/spk_xvector.ark:179486
89
+ p329 dump/xvector/eval1/spk_xvector.ark:181549
90
+ p330 dump/xvector/eval1/spk_xvector.ark:183612
91
+ p333 dump/xvector/eval1/spk_xvector.ark:185675
92
+ p334 dump/xvector/eval1/spk_xvector.ark:187738
93
+ p335 dump/xvector/eval1/spk_xvector.ark:189801
94
+ p336 dump/xvector/eval1/spk_xvector.ark:191864
95
+ p339 dump/xvector/eval1/spk_xvector.ark:193927
96
+ p340 dump/xvector/eval1/spk_xvector.ark:195990
97
+ p341 dump/xvector/eval1/spk_xvector.ark:198053
98
+ p343 dump/xvector/eval1/spk_xvector.ark:200116
99
+ p345 dump/xvector/eval1/spk_xvector.ark:202179
100
+ p347 dump/xvector/eval1/spk_xvector.ark:204242
101
+ p351 dump/xvector/eval1/spk_xvector.ark:206305
102
+ p360 dump/xvector/eval1/spk_xvector.ark:208368
103
+ p361 dump/xvector/eval1/spk_xvector.ark:210431
104
+ p362 dump/xvector/eval1/spk_xvector.ark:212494
105
+ p363 dump/xvector/eval1/spk_xvector.ark:214557
106
+ p364 dump/xvector/eval1/spk_xvector.ark:216620
107
+ p374 dump/xvector/eval1/spk_xvector.ark:218683
108
+ p376 dump/xvector/eval1/spk_xvector.ark:220746
dump/xvector/tr_no_dev/spk_xvector.ark ADDED
Binary file (223 kB). View file
 
dump/xvector/tr_no_dev/spk_xvector.scp ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ p225 dump/xvector/tr_no_dev/spk_xvector.ark:5
2
+ p226 dump/xvector/tr_no_dev/spk_xvector.ark:2068
3
+ p227 dump/xvector/tr_no_dev/spk_xvector.ark:4131
4
+ p228 dump/xvector/tr_no_dev/spk_xvector.ark:6194
5
+ p229 dump/xvector/tr_no_dev/spk_xvector.ark:8257
6
+ p230 dump/xvector/tr_no_dev/spk_xvector.ark:10320
7
+ p231 dump/xvector/tr_no_dev/spk_xvector.ark:12383
8
+ p232 dump/xvector/tr_no_dev/spk_xvector.ark:14446
9
+ p233 dump/xvector/tr_no_dev/spk_xvector.ark:16509
10
+ p234 dump/xvector/tr_no_dev/spk_xvector.ark:18572
11
+ p236 dump/xvector/tr_no_dev/spk_xvector.ark:20635
12
+ p237 dump/xvector/tr_no_dev/spk_xvector.ark:22698
13
+ p238 dump/xvector/tr_no_dev/spk_xvector.ark:24761
14
+ p239 dump/xvector/tr_no_dev/spk_xvector.ark:26824
15
+ p240 dump/xvector/tr_no_dev/spk_xvector.ark:28887
16
+ p241 dump/xvector/tr_no_dev/spk_xvector.ark:30950
17
+ p243 dump/xvector/tr_no_dev/spk_xvector.ark:33013
18
+ p244 dump/xvector/tr_no_dev/spk_xvector.ark:35076
19
+ p245 dump/xvector/tr_no_dev/spk_xvector.ark:37139
20
+ p246 dump/xvector/tr_no_dev/spk_xvector.ark:39202
21
+ p247 dump/xvector/tr_no_dev/spk_xvector.ark:41265
22
+ p248 dump/xvector/tr_no_dev/spk_xvector.ark:43328
23
+ p249 dump/xvector/tr_no_dev/spk_xvector.ark:45391
24
+ p250 dump/xvector/tr_no_dev/spk_xvector.ark:47454
25
+ p251 dump/xvector/tr_no_dev/spk_xvector.ark:49517
26
+ p252 dump/xvector/tr_no_dev/spk_xvector.ark:51580
27
+ p253 dump/xvector/tr_no_dev/spk_xvector.ark:53643
28
+ p254 dump/xvector/tr_no_dev/spk_xvector.ark:55706
29
+ p255 dump/xvector/tr_no_dev/spk_xvector.ark:57769
30
+ p256 dump/xvector/tr_no_dev/spk_xvector.ark:59832
31
+ p257 dump/xvector/tr_no_dev/spk_xvector.ark:61895
32
+ p258 dump/xvector/tr_no_dev/spk_xvector.ark:63958
33
+ p259 dump/xvector/tr_no_dev/spk_xvector.ark:66021
34
+ p260 dump/xvector/tr_no_dev/spk_xvector.ark:68084
35
+ p261 dump/xvector/tr_no_dev/spk_xvector.ark:70147
36
+ p262 dump/xvector/tr_no_dev/spk_xvector.ark:72210
37
+ p263 dump/xvector/tr_no_dev/spk_xvector.ark:74273
38
+ p264 dump/xvector/tr_no_dev/spk_xvector.ark:76336
39
+ p265 dump/xvector/tr_no_dev/spk_xvector.ark:78399
40
+ p266 dump/xvector/tr_no_dev/spk_xvector.ark:80462
41
+ p267 dump/xvector/tr_no_dev/spk_xvector.ark:82525
42
+ p268 dump/xvector/tr_no_dev/spk_xvector.ark:84588
43
+ p269 dump/xvector/tr_no_dev/spk_xvector.ark:86651
44
+ p270 dump/xvector/tr_no_dev/spk_xvector.ark:88714
45
+ p271 dump/xvector/tr_no_dev/spk_xvector.ark:90777
46
+ p272 dump/xvector/tr_no_dev/spk_xvector.ark:92840
47
+ p273 dump/xvector/tr_no_dev/spk_xvector.ark:94903
48
+ p274 dump/xvector/tr_no_dev/spk_xvector.ark:96966
49
+ p275 dump/xvector/tr_no_dev/spk_xvector.ark:99029
50
+ p276 dump/xvector/tr_no_dev/spk_xvector.ark:101092
51
+ p277 dump/xvector/tr_no_dev/spk_xvector.ark:103155
52
+ p278 dump/xvector/tr_no_dev/spk_xvector.ark:105218
53
+ p279 dump/xvector/tr_no_dev/spk_xvector.ark:107281
54
+ p280 dump/xvector/tr_no_dev/spk_xvector.ark:109344
55
+ p281 dump/xvector/tr_no_dev/spk_xvector.ark:111407
56
+ p282 dump/xvector/tr_no_dev/spk_xvector.ark:113470
57
+ p283 dump/xvector/tr_no_dev/spk_xvector.ark:115533
58
+ p284 dump/xvector/tr_no_dev/spk_xvector.ark:117596
59
+ p285 dump/xvector/tr_no_dev/spk_xvector.ark:119659
60
+ p286 dump/xvector/tr_no_dev/spk_xvector.ark:121722
61
+ p287 dump/xvector/tr_no_dev/spk_xvector.ark:123785
62
+ p288 dump/xvector/tr_no_dev/spk_xvector.ark:125848
63
+ p292 dump/xvector/tr_no_dev/spk_xvector.ark:127911
64
+ p293 dump/xvector/tr_no_dev/spk_xvector.ark:129974
65
+ p294 dump/xvector/tr_no_dev/spk_xvector.ark:132037
66
+ p295 dump/xvector/tr_no_dev/spk_xvector.ark:134100
67
+ p297 dump/xvector/tr_no_dev/spk_xvector.ark:136163
68
+ p298 dump/xvector/tr_no_dev/spk_xvector.ark:138226
69
+ p299 dump/xvector/tr_no_dev/spk_xvector.ark:140289
70
+ p300 dump/xvector/tr_no_dev/spk_xvector.ark:142352
71
+ p301 dump/xvector/tr_no_dev/spk_xvector.ark:144415
72
+ p302 dump/xvector/tr_no_dev/spk_xvector.ark:146478
73
+ p303 dump/xvector/tr_no_dev/spk_xvector.ark:148541
74
+ p304 dump/xvector/tr_no_dev/spk_xvector.ark:150604
75
+ p305 dump/xvector/tr_no_dev/spk_xvector.ark:152667
76
+ p306 dump/xvector/tr_no_dev/spk_xvector.ark:154730
77
+ p307 dump/xvector/tr_no_dev/spk_xvector.ark:156793
78
+ p308 dump/xvector/tr_no_dev/spk_xvector.ark:158856
79
+ p310 dump/xvector/tr_no_dev/spk_xvector.ark:160919
80
+ p311 dump/xvector/tr_no_dev/spk_xvector.ark:162982
81
+ p312 dump/xvector/tr_no_dev/spk_xvector.ark:165045
82
+ p313 dump/xvector/tr_no_dev/spk_xvector.ark:167108
83
+ p314 dump/xvector/tr_no_dev/spk_xvector.ark:169171
84
+ p316 dump/xvector/tr_no_dev/spk_xvector.ark:171234
85
+ p317 dump/xvector/tr_no_dev/spk_xvector.ark:173297
86
+ p318 dump/xvector/tr_no_dev/spk_xvector.ark:175360
87
+ p323 dump/xvector/tr_no_dev/spk_xvector.ark:177423
88
+ p326 dump/xvector/tr_no_dev/spk_xvector.ark:179486
89
+ p329 dump/xvector/tr_no_dev/spk_xvector.ark:181549
90
+ p330 dump/xvector/tr_no_dev/spk_xvector.ark:183612
91
+ p333 dump/xvector/tr_no_dev/spk_xvector.ark:185675
92
+ p334 dump/xvector/tr_no_dev/spk_xvector.ark:187738
93
+ p335 dump/xvector/tr_no_dev/spk_xvector.ark:189801
94
+ p336 dump/xvector/tr_no_dev/spk_xvector.ark:191864
95
+ p339 dump/xvector/tr_no_dev/spk_xvector.ark:193927
96
+ p340 dump/xvector/tr_no_dev/spk_xvector.ark:195990
97
+ p341 dump/xvector/tr_no_dev/spk_xvector.ark:198053
98
+ p343 dump/xvector/tr_no_dev/spk_xvector.ark:200116
99
+ p345 dump/xvector/tr_no_dev/spk_xvector.ark:202179
100
+ p347 dump/xvector/tr_no_dev/spk_xvector.ark:204242
101
+ p351 dump/xvector/tr_no_dev/spk_xvector.ark:206305
102
+ p360 dump/xvector/tr_no_dev/spk_xvector.ark:208368
103
+ p361 dump/xvector/tr_no_dev/spk_xvector.ark:210431
104
+ p362 dump/xvector/tr_no_dev/spk_xvector.ark:212494
105
+ p363 dump/xvector/tr_no_dev/spk_xvector.ark:214557
106
+ p364 dump/xvector/tr_no_dev/spk_xvector.ark:216620
107
+ p374 dump/xvector/tr_no_dev/spk_xvector.ark:218683
108
+ p376 dump/xvector/tr_no_dev/spk_xvector.ark:220746
exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_gst+xvector_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 46493
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ cudnn_enabled: true
21
+ cudnn_benchmark: false
22
+ cudnn_deterministic: true
23
+ collect_stats: false
24
+ write_collected_feats: false
25
+ max_epoch: 500
26
+ patience: null
27
+ val_scheduler_criterion:
28
+ - valid
29
+ - loss
30
+ early_stopping_criterion:
31
+ - valid
32
+ - loss
33
+ - min
34
+ best_model_criterion:
35
+ - - valid
36
+ - loss
37
+ - min
38
+ - - train
39
+ - loss
40
+ - min
41
+ keep_nbest_models: 5
42
+ grad_clip: 1.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 2
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ unused_parameters: false
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ pretrain_path: null
57
+ init_param: []
58
+ freeze_param: []
59
+ num_iters_per_epoch: 1000
60
+ batch_size: 20
61
+ valid_batch_size: null
62
+ batch_bins: 9000000
63
+ valid_batch_bins: null
64
+ train_shape_file:
65
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/text_shape.phn
66
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/speech_shape
67
+ valid_shape_file:
68
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/text_shape.phn
69
+ - exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/valid/speech_shape
70
+ batch_type: numel
71
+ valid_batch_type: null
72
+ fold_length:
73
+ - 150
74
+ - 240000
75
+ sort_in_batch: descending
76
+ sort_batch: descending
77
+ multiple_iterator: false
78
+ chunk_length: 500
79
+ chunk_shift_ratio: 0.5
80
+ num_cache_chunks: 1024
81
+ train_data_path_and_name_and_type:
82
+ - - dump/raw/tr_no_dev/text
83
+ - text
84
+ - text
85
+ - - dump/raw/tr_no_dev/wav.scp
86
+ - speech
87
+ - sound
88
+ - - dump/xvector/tr_no_dev/xvector.scp
89
+ - spembs
90
+ - kaldi_ark
91
+ valid_data_path_and_name_and_type:
92
+ - - dump/raw/dev/text
93
+ - text
94
+ - text
95
+ - - dump/raw/dev/wav.scp
96
+ - speech
97
+ - sound
98
+ - - dump/xvector/dev/xvector.scp
99
+ - spembs
100
+ - kaldi_ark
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 1.0
108
+ scheduler: noamlr
109
+ scheduler_conf:
110
+ model_size: 512
111
+ warmup_steps: 12000
112
+ token_list:
113
+ - <blank>
114
+ - <unk>
115
+ - AH0
116
+ - T
117
+ - N
118
+ - S
119
+ - R
120
+ - IH1
121
+ - D
122
+ - L
123
+ - .
124
+ - Z
125
+ - DH
126
+ - K
127
+ - W
128
+ - M
129
+ - AE1
130
+ - EH1
131
+ - AA1
132
+ - IH0
133
+ - IY1
134
+ - AH1
135
+ - B
136
+ - P
137
+ - V
138
+ - ER0
139
+ - F
140
+ - HH
141
+ - AY1
142
+ - EY1
143
+ - UW1
144
+ - IY0
145
+ - AO1
146
+ - OW1
147
+ - G
148
+ - ','
149
+ - NG
150
+ - SH
151
+ - Y
152
+ - JH
153
+ - AW1
154
+ - UH1
155
+ - TH
156
+ - ER1
157
+ - CH
158
+ - '?'
159
+ - OW0
160
+ - OW2
161
+ - EH2
162
+ - EY2
163
+ - UW0
164
+ - IH2
165
+ - OY1
166
+ - AY2
167
+ - ZH
168
+ - AW2
169
+ - EH0
170
+ - IY2
171
+ - AA2
172
+ - AE0
173
+ - AH2
174
+ - AE2
175
+ - AO0
176
+ - AO2
177
+ - AY0
178
+ - UW2
179
+ - UH2
180
+ - AA0
181
+ - AW0
182
+ - EY0
183
+ - '!'
184
+ - UH0
185
+ - ER2
186
+ - OY2
187
+ - ''''
188
+ - OY0
189
+ - <sos/eos>
190
+ odim: null
191
+ model_conf: {}
192
+ use_preprocessor: true
193
+ token_type: phn
194
+ bpemodel: null
195
+ non_linguistic_symbols: null
196
+ cleaner: tacotron
197
+ g2p: g2p_en_no_space
198
+ feats_extract: fbank
199
+ feats_extract_conf:
200
+ fs: 24000
201
+ fmin: 80
202
+ fmax: 7600
203
+ n_mels: 80
204
+ hop_length: 300
205
+ n_fft: 2048
206
+ win_length: 1200
207
+ normalize: global_mvn
208
+ normalize_conf:
209
+ stats_file: exp/tts_stats_raw_phn_tacotron_g2p_en_no_space/train/feats_stats.npz
210
+ tts: transformer
211
+ tts_conf:
212
+ embed_dim: 0
213
+ eprenet_conv_layers: 0
214
+ eprenet_conv_filts: 0
215
+ eprenet_conv_chans: 0
216
+ dprenet_layers: 2
217
+ dprenet_units: 256
218
+ adim: 512
219
+ aheads: 8
220
+ elayers: 6
221
+ eunits: 1024
222
+ dlayers: 6
223
+ dunits: 1024
224
+ positionwise_layer_type: conv1d
225
+ positionwise_conv_kernel_size: 1
226
+ postnet_layers: 5
227
+ postnet_filts: 5
228
+ postnet_chans: 256
229
+ spk_embed_dim: 512
230
+ spk_embed_integration_type: add
231
+ use_gst: true
232
+ gst_heads: 4
233
+ gst_tokens: 16
234
+ use_masking: true
235
+ bce_pos_weight: 5.0
236
+ use_scaled_pos_enc: true
237
+ encoder_normalize_before: true
238
+ decoder_normalize_before: true
239
+ reduction_factor: 1
240
+ init_type: xavier_uniform
241
+ init_enc_alpha: 1.0
242
+ init_dec_alpha: 1.0
243
+ eprenet_dropout_rate: 0.0
244
+ dprenet_dropout_rate: 0.5
245
+ postnet_dropout_rate: 0.5
246
+ transformer_enc_dropout_rate: 0.1
247
+ transformer_enc_positional_dropout_rate: 0.1
248
+ transformer_enc_attn_dropout_rate: 0.1
249
+ transformer_dec_dropout_rate: 0.1
250
+ transformer_dec_positional_dropout_rate: 0.1
251
+ transformer_dec_attn_dropout_rate: 0.1
252
+ transformer_enc_dec_attn_dropout_rate: 0.1
253
+ use_guided_attn_loss: true
254
+ num_heads_applied_guided_attn: 2
255
+ num_layers_applied_guided_attn: 2
256
+ modules_applied_guided_attn:
257
+ - encoder-decoder
258
+ guided_attn_loss_sigma: 0.4
259
+ guided_attn_loss_lambda: 10.0
260
+ pitch_extract: null
261
+ pitch_extract_conf: {}
262
+ pitch_normalize: null
263
+ pitch_normalize_conf: {}
264
+ energy_extract: null
265
+ energy_extract_conf: {}
266
+ energy_normalize: null
267
+ energy_normalize_conf: {}
268
+ required:
269
+ - output_dir
270
+ - token_list
271
+ distributed: true
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/backward_time.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/bce_loss.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/decoder_alpha.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/enc_dec_attn_loss.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/encoder_alpha.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/forward_time.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/iter_time.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/l1_loss.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/l2_loss.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/loss.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/lr_0.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/optim_step_time.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/images/train_time.png ADDED
exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28cdd90a174c8241118db93389021484d1548ccf429d8cb7ef154a68c2dc3e5
3
+ size 137197841
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.8.0
2
+ files:
3
+ model_file: exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/train.loss.ave_5best.pth
4
+ python: "3.7.3 (default, Mar 27 2019, 22:11:17) \n[GCC 7.3.0]"
5
+ timestamp: 1608877602.794003
6
+ torch: 1.5.1
7
+ yaml_files:
8
+ train_config: exp/tts_train_gst+xvector_transformer_raw_phn_tacotron_g2p_en_no_space/config.yaml