txya900619 commited on
Commit
e523ebb
1 Parent(s): eb3e9a0

upload: model pth and configs

Browse files
Files changed (5) hide show
  1. config.json +983 -0
  2. language_ids.json +7 -0
  3. model.pth +3 -0
  4. speaker_embs.pth +3 -0
  5. speakers.pth +3 -0
config.json ADDED
@@ -0,0 +1,983 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/home/wayne/TTS/recipes/hat-tts/vits/results",
3
+ "logger_uri": null,
4
+ "run_name": "vits_hat_tts",
5
+ "project_name": "vits_hat_tts",
6
+ "run_description": "\ud83d\udc38Coqui trainer run.",
7
+ "print_step": 100,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "wandb",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 10000,
14
+ "save_step": 10000,
15
+ "save_n_checkpoints": 5,
16
+ "save_checkpoints": true,
17
+ "save_all_best": false,
18
+ "save_best_after": 50,
19
+ "target_loss": "loss_1",
20
+ "print_eval": true,
21
+ "test_delay_epochs": -1,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": true,
27
+ "precision": "fp16",
28
+ "epochs": 500,
29
+ "batch_size": 54,
30
+ "eval_batch_size": 128,
31
+ "grad_clip": [
32
+ 5.0,
33
+ 5.0
34
+ ],
35
+ "scheduler_after_epoch": true,
36
+ "lr": 0.001,
37
+ "optimizer": "AdamW",
38
+ "optimizer_params": {
39
+ "betas": [
40
+ 0.8,
41
+ 0.99
42
+ ],
43
+ "eps": 1e-09,
44
+ "weight_decay": 0.01
45
+ },
46
+ "lr_scheduler": null,
47
+ "lr_scheduler_params": {},
48
+ "use_grad_scaler": false,
49
+ "allow_tf32": false,
50
+ "cudnn_enable": true,
51
+ "cudnn_deterministic": false,
52
+ "cudnn_benchmark": false,
53
+ "training_seed": 54321,
54
+ "model": "vits",
55
+ "num_loader_workers": 16,
56
+ "num_eval_loader_workers": 16,
57
+ "use_noise_augment": false,
58
+ "audio": {
59
+ "fft_size": 1024,
60
+ "sample_rate": 22050,
61
+ "win_length": 1024,
62
+ "hop_length": 256,
63
+ "num_mels": 80,
64
+ "mel_fmin": 0,
65
+ "mel_fmax": null
66
+ },
67
+ "use_phonemes": false,
68
+ "phonemizer": null,
69
+ "phoneme_language": null,
70
+ "compute_input_seq_cache": true,
71
+ "text_cleaner": null,
72
+ "enable_eos_bos_chars": false,
73
+ "test_sentences_file": "",
74
+ "phoneme_cache_path": null,
75
+ "characters": {
76
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
77
+ "vocab_dict": null,
78
+ "pad": "_",
79
+ "eos": "&",
80
+ "bos": "*",
81
+ "blank": null,
82
+ "characters": [
83
+ "11",
84
+ "113",
85
+ "2",
86
+ "21",
87
+ "24",
88
+ "31",
89
+ "33",
90
+ "35",
91
+ "43",
92
+ "5",
93
+ "53",
94
+ "54",
95
+ "55",
96
+ "a",
97
+ "b",
98
+ "d",
99
+ "e",
100
+ "f",
101
+ "h",
102
+ "i",
103
+ "j",
104
+ "k",
105
+ "l",
106
+ "m",
107
+ "n",
108
+ "o",
109
+ "p",
110
+ "s",
111
+ "t",
112
+ "u",
113
+ "v",
114
+ "w",
115
+ "z",
116
+ "~",
117
+ "\u00e6",
118
+ "\u00f0",
119
+ "\u014b",
120
+ "\u0251",
121
+ "\u0254",
122
+ "\u0255",
123
+ "\u0259",
124
+ "\u025a",
125
+ "\u025b",
126
+ "\u025d",
127
+ "\u0261",
128
+ "\u0268",
129
+ "\u026a",
130
+ "\u0279",
131
+ "\u0282",
132
+ "\u0283",
133
+ "\u0288",
134
+ "\u028a",
135
+ "\u028c",
136
+ "\u0290",
137
+ "\u0292",
138
+ "\u02b0",
139
+ "\u0329",
140
+ "\u0361",
141
+ "\u03b8"
142
+ ],
143
+ "punctuations": "\uff0c ",
144
+ "phonemes": null,
145
+ "is_unique": true,
146
+ "is_sorted": true
147
+ },
148
+ "add_blank": true,
149
+ "batch_group_size": 2,
150
+ "loss_masking": null,
151
+ "min_audio_len": 1,
152
+ "max_audio_len": 661500,
153
+ "min_text_len": 1,
154
+ "max_text_len": Infinity,
155
+ "compute_f0": false,
156
+ "compute_energy": false,
157
+ "compute_linear_spec": true,
158
+ "precompute_num_workers": 0,
159
+ "start_by_longest": false,
160
+ "shuffle": false,
161
+ "drop_last": false,
162
+ "datasets": [
163
+ {
164
+ "formatter": "hat_tts",
165
+ "dataset_name": "hat_tts_sixian",
166
+ "path": "/home/wayne/Corpora/hat_tts",
167
+ "meta_file_train": "sixian_concat.json",
168
+ "ignored_speakers": null,
169
+ "language": "sixian",
170
+ "phonemizer": "",
171
+ "meta_file_val": "",
172
+ "meta_file_attn_mask": ""
173
+ },
174
+ {
175
+ "formatter": "hat_tts",
176
+ "dataset_name": "hat_tts_hailu",
177
+ "path": "/home/wayne/Corpora/hat_tts",
178
+ "meta_file_train": "hailu_concat.json",
179
+ "ignored_speakers": null,
180
+ "language": "hailu",
181
+ "phonemizer": "",
182
+ "meta_file_val": "",
183
+ "meta_file_attn_mask": ""
184
+ },
185
+ {
186
+ "formatter": "hakka_tts_general",
187
+ "dataset_name": "hac_vocab_dapu_e",
188
+ "path": "/home/wayne/Corpora/hac_vocab",
189
+ "meta_file_train": "dapu_e_concat.json",
190
+ "ignored_speakers": null,
191
+ "language": "dapu",
192
+ "phonemizer": "",
193
+ "meta_file_val": "",
194
+ "meta_file_attn_mask": ""
195
+ },
196
+ {
197
+ "formatter": "hakka_tts_general",
198
+ "dataset_name": "hac_vocab_hailu_e",
199
+ "path": "/home/wayne/Corpora/hac_vocab",
200
+ "meta_file_train": "hailu_e_concat.json",
201
+ "ignored_speakers": null,
202
+ "language": "hailu",
203
+ "phonemizer": "",
204
+ "meta_file_val": "",
205
+ "meta_file_attn_mask": ""
206
+ },
207
+ {
208
+ "formatter": "hakka_tts_general",
209
+ "dataset_name": "hac_vocab_raoping_e",
210
+ "path": "/home/wayne/Corpora/hac_vocab",
211
+ "meta_file_train": "raoping_e_concat.json",
212
+ "ignored_speakers": null,
213
+ "language": "raoping",
214
+ "phonemizer": "",
215
+ "meta_file_val": "",
216
+ "meta_file_attn_mask": ""
217
+ },
218
+ {
219
+ "formatter": "hakka_tts_general",
220
+ "dataset_name": "hac_vocab_sixian_e",
221
+ "path": "/home/wayne/Corpora/hac_vocab",
222
+ "meta_file_train": "sixian_e_concat.json",
223
+ "ignored_speakers": null,
224
+ "language": "sixian",
225
+ "phonemizer": "",
226
+ "meta_file_val": "",
227
+ "meta_file_attn_mask": ""
228
+ },
229
+ {
230
+ "formatter": "hakka_tts_general",
231
+ "dataset_name": "hac_vocab_zhaoan_e",
232
+ "path": "/home/wayne/Corpora/hac_vocab",
233
+ "meta_file_train": "zhaoan_e_concat.json",
234
+ "ignored_speakers": null,
235
+ "language": "zhaoan",
236
+ "phonemizer": "",
237
+ "meta_file_val": "",
238
+ "meta_file_attn_mask": ""
239
+ },
240
+ {
241
+ "formatter": "hat_tts",
242
+ "dataset_name": "hakkaradio_news_dapu",
243
+ "path": "/home/wayne/Corpora/hakkaradio_news",
244
+ "meta_file_train": "dapu_concat.json",
245
+ "ignored_speakers": null,
246
+ "language": "dapu",
247
+ "phonemizer": "",
248
+ "meta_file_val": "",
249
+ "meta_file_attn_mask": ""
250
+ },
251
+ {
252
+ "formatter": "hat_tts",
253
+ "dataset_name": "hakkaradio_news_hailu",
254
+ "path": "/home/wayne/Corpora/hakkaradio_news",
255
+ "meta_file_train": "hailu_concat.json",
256
+ "ignored_speakers": null,
257
+ "language": "hailu",
258
+ "phonemizer": "",
259
+ "meta_file_val": "",
260
+ "meta_file_attn_mask": ""
261
+ },
262
+ {
263
+ "formatter": "hat_tts",
264
+ "dataset_name": "hakkaradio_news_raoping",
265
+ "path": "/home/wayne/Corpora/hakkaradio_news",
266
+ "meta_file_train": "raoping_concat.json",
267
+ "ignored_speakers": null,
268
+ "language": "raoping",
269
+ "phonemizer": "",
270
+ "meta_file_val": "",
271
+ "meta_file_attn_mask": ""
272
+ },
273
+ {
274
+ "formatter": "hat_tts",
275
+ "dataset_name": "hakkaradio_news_sixian",
276
+ "path": "/home/wayne/Corpora/hakkaradio_news",
277
+ "meta_file_train": "sixian_concat.json",
278
+ "ignored_speakers": null,
279
+ "language": "sixian",
280
+ "phonemizer": "",
281
+ "meta_file_val": "",
282
+ "meta_file_attn_mask": ""
283
+ },
284
+ {
285
+ "formatter": "hat_tts",
286
+ "dataset_name": "hakkaradio_news_zhaoan",
287
+ "path": "/home/wayne/Corpora/hakkaradio_news",
288
+ "meta_file_train": "zhaoan_concat.json",
289
+ "ignored_speakers": null,
290
+ "language": "zhaoan",
291
+ "phonemizer": "",
292
+ "meta_file_val": "",
293
+ "meta_file_attn_mask": ""
294
+ }
295
+ ],
296
+ "test_sentences": [
297
+ [
298
+ [
299
+ "t",
300
+ "\u02b0",
301
+ "u",
302
+ "\u014b",
303
+ "11",
304
+ "h",
305
+ "o",
306
+ "k",
307
+ "5"
308
+ ],
309
+ "XF",
310
+ null,
311
+ "sixian"
312
+ ],
313
+ [
314
+ [
315
+ "p",
316
+ "e",
317
+ "t",
318
+ "2",
319
+ "p",
320
+ "\u02b0",
321
+ "u",
322
+ "55"
323
+ ],
324
+ "XF",
325
+ null,
326
+ "sixian"
327
+ ],
328
+ [
329
+ [
330
+ "v",
331
+ "u",
332
+ "k",
333
+ "2",
334
+ "h",
335
+ "a",
336
+ "24"
337
+ ],
338
+ "XF",
339
+ null,
340
+ "sixian"
341
+ ],
342
+ [
343
+ [
344
+ "k",
345
+ "\u02b0",
346
+ "o",
347
+ "n",
348
+ "55",
349
+ "t",
350
+ "e",
351
+ "t",
352
+ "2",
353
+ " ",
354
+ "t",
355
+ "o",
356
+ "55"
357
+ ],
358
+ "XF",
359
+ null,
360
+ "sixian"
361
+ ],
362
+ [
363
+ [
364
+ "\u014b",
365
+ "a",
366
+ "24",
367
+ "\u0255",
368
+ "i",
369
+ "n",
370
+ "24",
371
+ "s",
372
+ "a",
373
+ "\u014b",
374
+ "24",
375
+ " ",
376
+ "t",
377
+ "o",
378
+ "31",
379
+ "\u0255",
380
+ "i",
381
+ "n",
382
+ "24",
383
+ "s",
384
+ "a",
385
+ "m",
386
+ "24",
387
+ " ",
388
+ "\uff0c",
389
+ " ",
390
+ "h",
391
+ "i",
392
+ "55",
393
+ " ",
394
+ "t",
395
+ "\u02b0",
396
+ "o",
397
+ "i",
398
+ "11",
399
+ "p",
400
+ "e",
401
+ "t",
402
+ "2",
403
+ "s",
404
+ "\u0268",
405
+ "55",
406
+ " ",
407
+ "k",
408
+ "e",
409
+ "55",
410
+ " ",
411
+ "k",
412
+ "u",
413
+ "e",
414
+ "t",
415
+ "2",
416
+ "k",
417
+ "a",
418
+ "24",
419
+ " ",
420
+ "i",
421
+ "m",
422
+ "24",
423
+ "l",
424
+ "o",
425
+ "k",
426
+ "5",
427
+ "t",
428
+ "\u02b0",
429
+ "a",
430
+ "\u014b",
431
+ "24",
432
+ " ",
433
+ "\uff0c",
434
+ " ",
435
+ "t",
436
+ "\u02b0",
437
+ "a",
438
+ "\u014b",
439
+ "24",
440
+ "i",
441
+ "m",
442
+ "24",
443
+ "\u014b",
444
+ "o",
445
+ "k",
446
+ "5",
447
+ " ",
448
+ "f",
449
+ "i",
450
+ "55"
451
+ ],
452
+ "XF",
453
+ null,
454
+ "sixian"
455
+ ],
456
+ [
457
+ [
458
+ "t",
459
+ "\u02b0",
460
+ "u",
461
+ "\u014b",
462
+ "55",
463
+ "h",
464
+ "o",
465
+ "k",
466
+ "2"
467
+ ],
468
+ "HF",
469
+ null,
470
+ "hailu"
471
+ ],
472
+ [
473
+ [
474
+ "p",
475
+ "e",
476
+ "t",
477
+ "5",
478
+ "p",
479
+ "\u02b0",
480
+ "u",
481
+ "33"
482
+ ],
483
+ "HF",
484
+ null,
485
+ "hailu"
486
+ ],
487
+ [
488
+ [
489
+ "v",
490
+ "u",
491
+ "k",
492
+ "5",
493
+ "h",
494
+ "a",
495
+ "53"
496
+ ],
497
+ "HF",
498
+ null,
499
+ "hailu"
500
+ ],
501
+ [
502
+ [
503
+ "k",
504
+ "\u02b0",
505
+ "o",
506
+ "n",
507
+ "11",
508
+ "t",
509
+ "e",
510
+ "t",
511
+ "5",
512
+ " ",
513
+ "t",
514
+ "o",
515
+ "11"
516
+ ],
517
+ "HF",
518
+ null,
519
+ "hailu"
520
+ ],
521
+ [
522
+ [
523
+ "n",
524
+ "a",
525
+ "m",
526
+ "55",
527
+ "\u014b",
528
+ "\u0329",
529
+ "24",
530
+ " ",
531
+ "p",
532
+ "\u02b0",
533
+ "i",
534
+ "n",
535
+ "55",
536
+ "t",
537
+ "e",
538
+ "n",
539
+ "24",
540
+ " ",
541
+ "k",
542
+ "a",
543
+ "i",
544
+ "11",
545
+ " ",
546
+ "\u0282",
547
+ "i",
548
+ "55",
549
+ "t",
550
+ "\u02b0",
551
+ "o",
552
+ "i",
553
+ "33",
554
+ " ",
555
+ "\uff0c",
556
+ " ",
557
+ "p",
558
+ "\u02b0",
559
+ "i",
560
+ "a",
561
+ "\u014b",
562
+ "55",
563
+ "p",
564
+ "\u02b0",
565
+ "i",
566
+ "a",
567
+ "\u014b",
568
+ "55",
569
+ " ",
570
+ "t",
571
+ "\u0361",
572
+ "s",
573
+ "o",
574
+ "11",
575
+ "t",
576
+ "e",
577
+ "t",
578
+ "5",
579
+ " ",
580
+ "\u0282",
581
+ "i",
582
+ "u",
583
+ "33",
584
+ " ",
585
+ "k",
586
+ "a",
587
+ "u",
588
+ "11",
589
+ "\u0290",
590
+ "u",
591
+ "k",
592
+ "2"
593
+ ],
594
+ "HF",
595
+ null,
596
+ "hailu"
597
+ ],
598
+ [
599
+ [
600
+ "k",
601
+ "o",
602
+ "53",
603
+ " ",
604
+ "p",
605
+ "\u02b0",
606
+ "i",
607
+ "e",
608
+ "t",
609
+ "21",
610
+ " ",
611
+ "t",
612
+ "i",
613
+ "a",
614
+ "m",
615
+ "31",
616
+ "\u014b",
617
+ "u",
618
+ "a",
619
+ "i",
620
+ "53",
621
+ "\u0288",
622
+ "\u0282",
623
+ "u",
624
+ "\u014b",
625
+ "33",
626
+ " ",
627
+ "k",
628
+ "i",
629
+ "u",
630
+ "31",
631
+ " ",
632
+ "s",
633
+ "\u0268",
634
+ "33",
635
+ " ",
636
+ "l",
637
+ "o",
638
+ "i",
639
+ "113",
640
+ " ",
641
+ "t",
642
+ "o",
643
+ "53",
644
+ " ",
645
+ "t",
646
+ "\u02b0",
647
+ "u",
648
+ "\u014b",
649
+ "113",
650
+ "f",
651
+ "a",
652
+ "33",
653
+ " ",
654
+ "l",
655
+ "i",
656
+ "m",
657
+ "113",
658
+ " ",
659
+ "h",
660
+ "a",
661
+ "33"
662
+ ],
663
+ "\u5b8b\u6db5\u8473",
664
+ null,
665
+ "dapu"
666
+ ],
667
+ [
668
+ [
669
+ "a",
670
+ "11",
671
+ " ",
672
+ "n",
673
+ "a",
674
+ "i",
675
+ "11",
676
+ " ",
677
+ "\uff0c",
678
+ " ",
679
+ "\u014b",
680
+ "i",
681
+ "55",
682
+ " ",
683
+ "k",
684
+ "i",
685
+ "a",
686
+ "k",
687
+ "2",
688
+ "k",
689
+ "i",
690
+ "a",
691
+ "k",
692
+ "2",
693
+ " ",
694
+ "l",
695
+ "o",
696
+ "i",
697
+ "55",
698
+ " ",
699
+ "k",
700
+ "\u02b0",
701
+ "a",
702
+ "n",
703
+ "24",
704
+ " ",
705
+ "s",
706
+ "i",
707
+ "n",
708
+ "11",
709
+ "v",
710
+ "u",
711
+ "n",
712
+ "55",
713
+ " ",
714
+ "p",
715
+ "o",
716
+ "53",
717
+ "t",
718
+ "\u02b0",
719
+ "o",
720
+ "11",
721
+ " ",
722
+ "\uff0c",
723
+ " ",
724
+ "s",
725
+ "\u0268",
726
+ "11",
727
+ " ",
728
+ "h",
729
+ "e",
730
+ "24",
731
+ " ",
732
+ "n",
733
+ "i",
734
+ "55",
735
+ " ",
736
+ "\u0288",
737
+ "\u0282",
738
+ "a",
739
+ "n",
740
+ "11",
741
+ " ",
742
+ "v",
743
+ "u",
744
+ "k",
745
+ "2"
746
+ ],
747
+ "\u5468\u78a9\u8208",
748
+ null,
749
+ "raoping"
750
+ ],
751
+ [
752
+ [
753
+ "h",
754
+ "i",
755
+ "e",
756
+ "t",
757
+ "24",
758
+ "\u014b",
759
+ "i",
760
+ "e",
761
+ "t",
762
+ "43",
763
+ " ",
764
+ "k",
765
+ "a",
766
+ "i",
767
+ "31",
768
+ " ",
769
+ "\u0282",
770
+ "i",
771
+ "53",
772
+ "b",
773
+ "u",
774
+ "55",
775
+ " ",
776
+ "\uff0c",
777
+ " ",
778
+ "a",
779
+ "11",
780
+ " ",
781
+ "t",
782
+ "s",
783
+ "\u02b0",
784
+ "i",
785
+ "u",
786
+ "\u014b",
787
+ "53",
788
+ " ",
789
+ "k",
790
+ "u",
791
+ "i",
792
+ "11",
793
+ " ",
794
+ "b",
795
+ "u",
796
+ "24",
797
+ "h",
798
+ "a",
799
+ "11",
800
+ " ",
801
+ "\u0288",
802
+ "\u0282",
803
+ "\u025b",
804
+ "n",
805
+ "31",
806
+ "k",
807
+ "\u02b0",
808
+ "u",
809
+ "i",
810
+ "31",
811
+ " ",
812
+ "t",
813
+ "s",
814
+ "\u02b0",
815
+ "a",
816
+ "i",
817
+ "55",
818
+ " ",
819
+ "k",
820
+ "\u02b0",
821
+ "a",
822
+ "24",
823
+ " ",
824
+ "t",
825
+ "s",
826
+ "o",
827
+ "\u014b",
828
+ "11",
829
+ " ",
830
+ "k",
831
+ "a",
832
+ "i",
833
+ "31",
834
+ " ",
835
+ "l",
836
+ "\u0254",
837
+ "31",
838
+ "b",
839
+ "u",
840
+ "24"
841
+ ],
842
+ "\u5ed6\u80b2\u8fb0",
843
+ null,
844
+ "zhaoan"
845
+ ]
846
+ ],
847
+ "eval_split_max_size": null,
848
+ "eval_split_size": 0.01429,
849
+ "use_speaker_weighted_sampler": false,
850
+ "speaker_weighted_sampler_alpha": 1.0,
851
+ "use_language_weighted_sampler": false,
852
+ "language_weighted_sampler_alpha": 1.0,
853
+ "use_length_weighted_sampler": false,
854
+ "length_weighted_sampler_alpha": 1.0,
855
+ "model_args": {
856
+ "num_chars": 63,
857
+ "out_channels": 513,
858
+ "spec_segment_size": 32,
859
+ "hidden_channels": 192,
860
+ "hidden_channels_ffn_text_encoder": 768,
861
+ "num_heads_text_encoder": 2,
862
+ "num_layers_text_encoder": 8,
863
+ "kernel_size_text_encoder": 3,
864
+ "dropout_p_text_encoder": 0.1,
865
+ "dropout_p_duration_predictor": 0.5,
866
+ "kernel_size_posterior_encoder": 5,
867
+ "dilation_rate_posterior_encoder": 1,
868
+ "num_layers_posterior_encoder": 16,
869
+ "kernel_size_flow": 5,
870
+ "dilation_rate_flow": 1,
871
+ "num_layers_flow": 4,
872
+ "resblock_type_decoder": "2",
873
+ "resblock_kernel_sizes_decoder": [
874
+ 3,
875
+ 7,
876
+ 11
877
+ ],
878
+ "resblock_dilation_sizes_decoder": [
879
+ [
880
+ 1,
881
+ 3,
882
+ 5
883
+ ],
884
+ [
885
+ 1,
886
+ 3,
887
+ 5
888
+ ],
889
+ [
890
+ 1,
891
+ 3,
892
+ 5
893
+ ]
894
+ ],
895
+ "upsample_rates_decoder": [
896
+ 8,
897
+ 8,
898
+ 2,
899
+ 2
900
+ ],
901
+ "upsample_initial_channel_decoder": 512,
902
+ "upsample_kernel_sizes_decoder": [
903
+ 16,
904
+ 16,
905
+ 4,
906
+ 4
907
+ ],
908
+ "periods_multi_period_discriminator": [
909
+ 2,
910
+ 3,
911
+ 5,
912
+ 7,
913
+ 11
914
+ ],
915
+ "use_sdp": true,
916
+ "noise_scale": 1.0,
917
+ "inference_noise_scale": 0.667,
918
+ "length_scale": 1,
919
+ "noise_scale_dp": 1.0,
920
+ "inference_noise_scale_dp": 1.0,
921
+ "max_inference_len": null,
922
+ "init_discriminator": true,
923
+ "use_spectral_norm_disriminator": false,
924
+ "use_speaker_embedding": false,
925
+ "num_speakers": 0,
926
+ "speakers_file": "speakers.pth",
927
+ "d_vector_file": [ "speaker_embs.pth" ],
928
+ "speaker_embedding_channels": 256,
929
+ "use_d_vector_file": true,
930
+ "d_vector_dim": 512,
931
+ "detach_dp_input": true,
932
+ "use_language_embedding": true,
933
+ "embedded_language_dim": 4,
934
+ "num_languages": 0,
935
+ "language_ids_file": "language_ids.json",
936
+ "use_speaker_encoder_as_loss": false,
937
+ "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
938
+ "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
939
+ "condition_dp_on_speaker": true,
940
+ "freeze_encoder": false,
941
+ "freeze_DP": false,
942
+ "freeze_PE": false,
943
+ "freeze_flow_decoder": false,
944
+ "freeze_waveform_decoder": false,
945
+ "encoder_sample_rate": null,
946
+ "interpolate_z": true,
947
+ "reinit_DP": false,
948
+ "reinit_text_encoder": false
949
+ },
950
+ "lr_gen": 0.0002,
951
+ "lr_disc": 0.0002,
952
+ "lr_scheduler_gen": "ExponentialLR",
953
+ "lr_scheduler_gen_params": {
954
+ "gamma": 0.999875,
955
+ "last_epoch": -1
956
+ },
957
+ "lr_scheduler_disc": "ExponentialLR",
958
+ "lr_scheduler_disc_params": {
959
+ "gamma": 0.999875,
960
+ "last_epoch": -1
961
+ },
962
+ "kl_loss_alpha": 1.0,
963
+ "disc_loss_alpha": 1.0,
964
+ "gen_loss_alpha": 1.0,
965
+ "feat_loss_alpha": 1.0,
966
+ "mel_loss_alpha": 45.0,
967
+ "dur_loss_alpha": 1.0,
968
+ "speaker_encoder_loss_alpha": 9.0,
969
+ "return_wav": true,
970
+ "use_weighted_sampler": false,
971
+ "weighted_sampler_attrs": {},
972
+ "weighted_sampler_multipliers": {},
973
+ "r": 1,
974
+ "num_speakers": 0,
975
+ "use_speaker_embedding": false,
976
+ "speakers_file": "speakers.pth",
977
+ "speaker_embedding_channels": 256,
978
+ "language_ids_file": "language_ids.json",
979
+ "use_language_embedding": true,
980
+ "use_d_vector_file": true,
981
+ "d_vector_file": [ "speaker_embs.pth" ],
982
+ "d_vector_dim": 512
983
+ }
language_ids.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "dapu": 0,
3
+ "hailu": 1,
4
+ "raoping": 2,
5
+ "sixian": 3,
6
+ "zhaoan": 4
7
+ }
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:506a81c5c4a0dc16f27545636d68325300809eba35ee60d1b52a04578449a6a1
3
+ size 1017227982
speaker_embs.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a12850eaaa260274257f392dc2d8b9a993c1513cba78eb879ca60f307375ad
3
+ size 89012
speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3336d57e6cb72ef0afec184b6303976a23609c70f58c6f66b99034dbeaa618b6
3
+ size 1248