Text-to-Speech
mms
vits
Vineel Pratap commited on
Commit
84d5af0
1 Parent(s): b55ad7a
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. full_models/abi/D_100000.pth +3 -0
  2. full_models/abi/G_100000.pth +3 -0
  3. full_models/abi/config.json +87 -0
  4. full_models/abi/vocab.txt +53 -0
  5. full_models/abp/D_100000.pth +3 -0
  6. full_models/abp/G_100000.pth +3 -0
  7. full_models/abp/config.json +87 -0
  8. full_models/abp/vocab.txt +33 -0
  9. full_models/aca/D_100000.pth +3 -0
  10. full_models/aca/G_100000.pth +3 -0
  11. full_models/aca/config.json +87 -0
  12. full_models/aca/vocab.txt +35 -0
  13. full_models/acd/D_100000.pth +3 -0
  14. full_models/acd/G_100000.pth +3 -0
  15. full_models/acd/config.json +87 -0
  16. full_models/acd/vocab.txt +28 -0
  17. full_models/ace/D_100000.pth +3 -0
  18. full_models/ace/G_100000.pth +3 -0
  19. full_models/ace/config.json +87 -0
  20. full_models/ace/vocab.txt +42 -0
  21. full_models/acf/D_100000.pth +3 -0
  22. full_models/acf/G_100000.pth +3 -0
  23. full_models/acf/config.json +87 -0
  24. full_models/acf/vocab.txt +33 -0
  25. full_models/ach/D_100000.pth +3 -0
  26. full_models/ach/G_100000.pth +3 -0
  27. full_models/ach/config.json +87 -0
  28. full_models/ach/vocab.txt +28 -0
  29. full_models/acn/D_100000.pth +3 -0
  30. full_models/acn/G_100000.pth +3 -0
  31. full_models/acn/config.json +87 -0
  32. full_models/acn/vocab.txt +37 -0
  33. full_models/acr/D_100000.pth +3 -0
  34. full_models/acr/G_100000.pth +3 -0
  35. full_models/acr/config.json +87 -0
  36. full_models/acr/vocab.txt +37 -0
  37. full_models/acu/D_100000.pth +3 -0
  38. full_models/acu/G_100000.pth +3 -0
  39. full_models/acu/config.json +87 -0
  40. full_models/acu/vocab.txt +35 -0
  41. full_models/ade/D_100000.pth +3 -0
  42. full_models/ade/G_100000.pth +3 -0
  43. full_models/ade/config.json +87 -0
  44. full_models/ade/vocab.txt +40 -0
  45. full_models/adh/D_100000.pth +3 -0
  46. full_models/adh/G_100000.pth +3 -0
  47. full_models/adh/config.json +87 -0
  48. full_models/adh/vocab.txt +29 -0
  49. full_models/adj/D_100000.pth +3 -0
  50. full_models/adj/G_100000.pth +3 -0
full_models/abi/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0be668a16e5b9b9dedd41e08442644f4ed894e4c62f43d06e7e448158428e2fe
3
+ size 561098185
full_models/abi/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bee8bd93805bd44cee1ff5497bc3a87220eeeec8b0fd6a2368d0609001a2868
3
+ size 436570305
full_models/abi/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/abi/vocab.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ɔ
2
+ ê
3
+ ǒ
4
+
5
+ ̂
6
+ h
7
+ ě
8
+ i
9
+ ɩ
10
+ k
11
+ á
12
+ ̌
13
+ ǐ
14
+ b
15
+ p
16
+ í
17
+ ǔ
18
+ u
19
+ ń
20
+ w
21
+ '
22
+ ί
23
+ f
24
+ ó
25
+ y
26
+ s
27
+ î
28
+ m
29
+ ɛ
30
+ έ
31
+ e
32
+ ʋ
33
+ ḿ
34
+ n
35
+ ú
36
+ o
37
+ d
38
+ â
39
+ ô
40
+ c
41
+ ǎ
42
+ é
43
+ ́
44
+ j
45
+ l
46
+ -
47
+ t
48
+ _
49
+ r
50
+ g
51
+ ε
52
+ û
53
+ a
full_models/abp/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3176637b067b5fc78605aff0b553ec09244da37ebbff3f419163cede7824c441
3
+ size 561098185
full_models/abp/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f621739a139733b14ac70f032ab4a677e8912fa3a1132ba3f8cf599dee6dbbac
3
+ size 436524225
full_models/abp/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/abp/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _
2
+ t
3
+ e
4
+ b
5
+ ō
6
+ j
7
+ c
8
+ r
9
+ f
10
+ w
11
+ i
12
+ q
13
+ h
14
+ g
15
+ l
16
+ m
17
+ k
18
+ y
19
+ d
20
+ ā
21
+ s
22
+ '
23
+ a
24
+ n
25
+ x
26
+ 6
27
+ o
28
+ -
29
+ p
30
+ u
31
+
32
+ v
33
+ z
full_models/aca/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae308c0880cb4e3bd2d02600485b473f068c38ff85bac0a7d5bd8951ba1ce963
3
+ size 561076199
full_models/aca/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78666df5cbdca3fbd91b2bb2f49841f8919b7a73ab6e504ed82f7597e41c190f
3
+ size 436353726
full_models/aca/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/aca/vocab.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ |
3
+ i
4
+ n
5
+ á
6
+ c
7
+ e
8
+ u
9
+ l
10
+ r
11
+ w
12
+ j
13
+ s
14
+ í
15
+ m
16
+ é
17
+ o
18
+ '
19
+ h
20
+ t
21
+ y
22
+ b
23
+ d
24
+ ú
25
+ q
26
+ ó
27
+ p
28
+
29
+ g
30
+ f
31
+ z
32
+ v
33
+ x
34
+ ñ
35
+
full_models/acd/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c06f2853c68b0bc604a40caa9261cf439f0b02d66a510b08a0660b0f8e3201
3
+ size 561078480
full_models/acd/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d3e15a2db968008b7b01eb6282b9c24115221f2abccd8935ee11a16d6f6cf9
3
+ size 436355114
full_models/acd/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acd/vocab.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ ɛ
4
+ n
5
+ ɔ
6
+ i
7
+ o
8
+ m
9
+ y
10
+ e
11
+ u
12
+ g
13
+ s
14
+ k
15
+ b
16
+ r
17
+ l
18
+ d
19
+ w
20
+ f
21
+ -
22
+ t
23
+ p
24
+ '
25
+ ŋ
26
+ h
27
+ c
28
+
full_models/ace/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecacad3f1f738085d053f35c5618c4abceaee39c28dbff2975e17fc918cea5c7
3
+ size 561078594
full_models/ace/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183895b264e9f91617ababbd088b5309ea79468730819506c8c8ab5e977085e6
3
+ size 436387528
full_models/ace/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ace/vocab.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ n
4
+ e
5
+ u
6
+ g
7
+ t
8
+ h
9
+ i
10
+ k
11
+ m
12
+ b
13
+ o
14
+ y
15
+ l
16
+ r
17
+ s
18
+ p
19
+ j
20
+ d
21
+ é
22
+ w
23
+ ô
24
+ ë
25
+ -
26
+ c
27
+ ö
28
+ á
29
+ ó
30
+ f
31
+ z
32
+ '
33
+ q
34
+ ú
35
+ `
36
+ 0
37
+ 6
38
+ 4
39
+ 3
40
+ 1
41
+ 2
42
+
full_models/acf/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:262ec2da7c0e7ded6eaa1cd84e37fe55127f23280d272d9453591df621511b21
3
+ size 561078869
full_models/acf/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f23d7c73bfdf05bec98d6fc71dffbf19409a9afb463b6974abccdaee53c215
3
+ size 436369451
full_models/acf/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acf/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ n
4
+ é
5
+ i
6
+ o
7
+ s
8
+ t
9
+ k
10
+ y
11
+ p
12
+ l
13
+ w
14
+ m
15
+ è
16
+ u
17
+ d
18
+ -
19
+ e
20
+ b
21
+ v
22
+ j
23
+ ò
24
+ z
25
+ f
26
+ ʼ
27
+ h
28
+ g
29
+ c
30
+ r
31
+
32
+ '
33
+
full_models/ach/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d772464b61e01ae7415ad3b4cae63851b5fd89c15b6157b311446e3f3ea7460c
3
+ size 561078618
full_models/ach/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46031da7d624512421e115dcfadede182580c6c3139d2ea2b7c20b4fabee7e1d
3
+ size 436355251
full_models/ach/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ach/vocab.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ o
3
+ a
4
+ i
5
+ e
6
+ k
7
+ n
8
+ m
9
+ w
10
+ t
11
+ u
12
+ y
13
+ l
14
+ c
15
+ d
16
+ b
17
+ g
18
+ r
19
+ p
20
+ ŋ
21
+ j
22
+ -
23
+ s
24
+ '
25
+ v
26
+ f
27
+ h
28
+
full_models/acn/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7c7622cba3d3aa313a67157d78898fcffb20dc0855d9c9ac93e544a79b11f3
3
+ size 561098185
full_models/acn/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f608e1298f921bba8ebfa9e1eadb3c599aff4a0eb64614fb32a3d372340a9b4b
3
+ size 436533441
full_models/acn/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acn/vocab.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ u
2
+ k
3
+ g
4
+ a
5
+ _
6
+
7
+ d
8
+ w
9
+ i
10
+ o
11
+ -
12
+ b
13
+ e
14
+ n
15
+ t
16
+ y
17
+ p
18
+ s
19
+ z
20
+ x
21
+ m
22
+ h
23
+ c
24
+
25
+ l
26
+ 0
27
+ 2
28
+ j
29
+ f
30
+ 3
31
+ 5
32
+ q
33
+ v
34
+ r
35
+ 6
36
+ 1
37
+ 4
full_models/acr/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e790e6b73ad3a311ec0bd311e50a33846f38b8e63f074650687a6f588ea7df
3
+ size 561078709
full_models/acr/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:892faf58e703daf31d96bd7b9913d2b0eda8f750305282ba760a656280a5437f
3
+ size 436375881
full_models/acr/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acr/vocab.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ i
4
+ '
5
+ k
6
+ r
7
+ u
8
+ e
9
+ j
10
+ n
11
+ o
12
+ c
13
+ l
14
+ h
15
+ t
16
+ q
17
+ w
18
+ x
19
+ m
20
+ b
21
+ s
22
+ y
23
+ p
24
+ z
25
+ d
26
+
27
+ ú
28
+ g
29
+ á
30
+ é
31
+ ó
32
+ f
33
+ í
34
+ v
35
+ -
36
+ ñ
37
+
full_models/acu/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e8f2db7fee9018cff7d22ff7ede80bfcfb408c2cb3a38a3ccf32f1594865969
3
+ size 561078587
full_models/acu/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5952ecca5322b2410a5056b1852650d2fb9289f3cebce5e9b95a7ee76eabc621
3
+ size 436371382
full_models/acu/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/acu/vocab.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a
2
+ |
3
+ i
4
+ n
5
+ u
6
+ t
7
+ r
8
+ m
9
+ k
10
+ s
11
+ e
12
+ h
13
+ j
14
+ c
15
+ w
16
+ y
17
+ p
18
+ g
19
+ o
20
+ í
21
+
22
+ ú
23
+ d
24
+ l
25
+ é
26
+ á
27
+ b
28
+ f
29
+ v
30
+ ó
31
+ z
32
+ q
33
+ x
34
+ ñ
35
+
full_models/ade/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41191e5d74955fec278ea692b2218c7920de8f16147aba556fdfce56b714f4c0
3
+ size 561078757
full_models/ade/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed6df678ce5a605da1adef5aa7016ec89dd3b209ffebad3156843473d688668
3
+ size 436384590
full_models/ade/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/ade/vocab.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ a
3
+ â
4
+ n
5
+ e
6
+ b
7
+ g
8
+ æ
9
+ i
10
+ t
11
+ w
12
+ ô
13
+ y
14
+ k
15
+ o
16
+ r
17
+ l
18
+ u
19
+ d
20
+ m
21
+ f
22
+ s
23
+ û
24
+ p
25
+ à
26
+ -
27
+ è
28
+ ã
29
+ õ
30
+ ù
31
+ î
32
+ å
33
+ ì
34
+ ü
35
+ ǹ
36
+ ò
37
+ h
38
+ '
39
+ c
40
+
full_models/adh/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f466d6d8e81596e107e5e3ebb2f5fea640411555a9ae7a6b719bc80f0d11f42
3
+ size 561078757
full_models/adh/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6a6d6796cd6997ca30c352c12f25d36db4d80ac374e42318aeba8a2ce1905a
3
+ size 436360329
full_models/adh/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 1000,
5
+ "seed": 1234,
6
+ "epochs": 20000,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 64,
14
+ "fp16_run": true,
15
+ "lr_decay": 0.999875,
16
+ "segment_size": 8192,
17
+ "init_lr_ratio": 1,
18
+ "warmup_epochs": 0,
19
+ "c_mel": 45,
20
+ "c_kl": 1.0
21
+ },
22
+ "data": {
23
+ "training_files": "train.ltr",
24
+ "validation_files": "dev.ltr",
25
+ "text_cleaners": [
26
+ "transliteration_cleaners"
27
+ ],
28
+ "max_wav_value": 32768.0,
29
+ "sampling_rate": 16000,
30
+ "filter_length": 1024,
31
+ "hop_length": 256,
32
+ "win_length": 1024,
33
+ "n_mel_channels": 80,
34
+ "mel_fmin": 0.0,
35
+ "mel_fmax": null,
36
+ "add_blank": true,
37
+ "n_speakers": 0,
38
+ "cleaned_text": true
39
+ },
40
+ "model": {
41
+ "inter_channels": 192,
42
+ "hidden_channels": 192,
43
+ "filter_channels": 768,
44
+ "n_heads": 2,
45
+ "n_layers": 6,
46
+ "kernel_size": 3,
47
+ "p_dropout": 0.1,
48
+ "resblock": "1",
49
+ "resblock_kernel_sizes": [
50
+ 3,
51
+ 7,
52
+ 11
53
+ ],
54
+ "resblock_dilation_sizes": [
55
+ [
56
+ 1,
57
+ 3,
58
+ 5
59
+ ],
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ]
70
+ ],
71
+ "upsample_rates": [
72
+ 8,
73
+ 8,
74
+ 2,
75
+ 2
76
+ ],
77
+ "upsample_initial_channel": 512,
78
+ "upsample_kernel_sizes": [
79
+ 16,
80
+ 16,
81
+ 4,
82
+ 4
83
+ ],
84
+ "n_layers_q": 3,
85
+ "use_spectral_norm": false
86
+ }
87
+ }
full_models/adh/vocab.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ |
2
+ o
3
+ i
4
+ a
5
+ e
6
+ n
7
+ k
8
+ m
9
+ w
10
+ r
11
+ y
12
+ h
13
+ t
14
+ d
15
+ j
16
+ g
17
+ u
18
+ l
19
+ p
20
+ c
21
+ b
22
+ ŋ
23
+ s
24
+ f
25
+ '
26
+ z
27
+ v
28
+ -
29
+
full_models/adj/D_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77757247ee18a706cb7cf12cf3c01269b4d96d9db1810496d5cecde1e4b37c5e
3
+ size 561078768
full_models/adj/G_100000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2841867f1f11ff7ef9a742e17c9847b747fbad5b2e027b43bf2268f1570e04d5
3
+ size 436399987