Vineel Pratap
commited on
Commit
•
84d5af0
1
Parent(s):
b55ad7a
update3
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- full_models/abi/D_100000.pth +3 -0
- full_models/abi/G_100000.pth +3 -0
- full_models/abi/config.json +87 -0
- full_models/abi/vocab.txt +53 -0
- full_models/abp/D_100000.pth +3 -0
- full_models/abp/G_100000.pth +3 -0
- full_models/abp/config.json +87 -0
- full_models/abp/vocab.txt +33 -0
- full_models/aca/D_100000.pth +3 -0
- full_models/aca/G_100000.pth +3 -0
- full_models/aca/config.json +87 -0
- full_models/aca/vocab.txt +35 -0
- full_models/acd/D_100000.pth +3 -0
- full_models/acd/G_100000.pth +3 -0
- full_models/acd/config.json +87 -0
- full_models/acd/vocab.txt +28 -0
- full_models/ace/D_100000.pth +3 -0
- full_models/ace/G_100000.pth +3 -0
- full_models/ace/config.json +87 -0
- full_models/ace/vocab.txt +42 -0
- full_models/acf/D_100000.pth +3 -0
- full_models/acf/G_100000.pth +3 -0
- full_models/acf/config.json +87 -0
- full_models/acf/vocab.txt +33 -0
- full_models/ach/D_100000.pth +3 -0
- full_models/ach/G_100000.pth +3 -0
- full_models/ach/config.json +87 -0
- full_models/ach/vocab.txt +28 -0
- full_models/acn/D_100000.pth +3 -0
- full_models/acn/G_100000.pth +3 -0
- full_models/acn/config.json +87 -0
- full_models/acn/vocab.txt +37 -0
- full_models/acr/D_100000.pth +3 -0
- full_models/acr/G_100000.pth +3 -0
- full_models/acr/config.json +87 -0
- full_models/acr/vocab.txt +37 -0
- full_models/acu/D_100000.pth +3 -0
- full_models/acu/G_100000.pth +3 -0
- full_models/acu/config.json +87 -0
- full_models/acu/vocab.txt +35 -0
- full_models/ade/D_100000.pth +3 -0
- full_models/ade/G_100000.pth +3 -0
- full_models/ade/config.json +87 -0
- full_models/ade/vocab.txt +40 -0
- full_models/adh/D_100000.pth +3 -0
- full_models/adh/G_100000.pth +3 -0
- full_models/adh/config.json +87 -0
- full_models/adh/vocab.txt +29 -0
- full_models/adj/D_100000.pth +3 -0
- full_models/adj/G_100000.pth +3 -0
full_models/abi/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0be668a16e5b9b9dedd41e08442644f4ed894e4c62f43d06e7e448158428e2fe
|
3 |
+
size 561098185
|
full_models/abi/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bee8bd93805bd44cee1ff5497bc3a87220eeeec8b0fd6a2368d0609001a2868
|
3 |
+
size 436570305
|
full_models/abi/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/abi/vocab.txt
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ɔ
|
2 |
+
ê
|
3 |
+
ǒ
|
4 |
+
|
5 |
+
̂
|
6 |
+
h
|
7 |
+
ě
|
8 |
+
i
|
9 |
+
ɩ
|
10 |
+
k
|
11 |
+
á
|
12 |
+
̌
|
13 |
+
ǐ
|
14 |
+
b
|
15 |
+
p
|
16 |
+
í
|
17 |
+
ǔ
|
18 |
+
u
|
19 |
+
ń
|
20 |
+
w
|
21 |
+
'
|
22 |
+
ί
|
23 |
+
f
|
24 |
+
ó
|
25 |
+
y
|
26 |
+
s
|
27 |
+
î
|
28 |
+
m
|
29 |
+
ɛ
|
30 |
+
έ
|
31 |
+
e
|
32 |
+
ʋ
|
33 |
+
ḿ
|
34 |
+
n
|
35 |
+
ú
|
36 |
+
o
|
37 |
+
d
|
38 |
+
â
|
39 |
+
ô
|
40 |
+
c
|
41 |
+
ǎ
|
42 |
+
é
|
43 |
+
́
|
44 |
+
j
|
45 |
+
l
|
46 |
+
-
|
47 |
+
t
|
48 |
+
_
|
49 |
+
r
|
50 |
+
g
|
51 |
+
ε
|
52 |
+
û
|
53 |
+
a
|
full_models/abp/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3176637b067b5fc78605aff0b553ec09244da37ebbff3f419163cede7824c441
|
3 |
+
size 561098185
|
full_models/abp/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f621739a139733b14ac70f032ab4a677e8912fa3a1132ba3f8cf599dee6dbbac
|
3 |
+
size 436524225
|
full_models/abp/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/abp/vocab.txt
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_
|
2 |
+
t
|
3 |
+
e
|
4 |
+
b
|
5 |
+
ō
|
6 |
+
j
|
7 |
+
c
|
8 |
+
r
|
9 |
+
f
|
10 |
+
w
|
11 |
+
i
|
12 |
+
q
|
13 |
+
h
|
14 |
+
g
|
15 |
+
l
|
16 |
+
m
|
17 |
+
k
|
18 |
+
y
|
19 |
+
d
|
20 |
+
ā
|
21 |
+
s
|
22 |
+
'
|
23 |
+
a
|
24 |
+
n
|
25 |
+
x
|
26 |
+
6
|
27 |
+
o
|
28 |
+
-
|
29 |
+
p
|
30 |
+
u
|
31 |
+
|
32 |
+
v
|
33 |
+
z
|
full_models/aca/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae308c0880cb4e3bd2d02600485b473f068c38ff85bac0a7d5bd8951ba1ce963
|
3 |
+
size 561076199
|
full_models/aca/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78666df5cbdca3fbd91b2bb2f49841f8919b7a73ab6e504ed82f7597e41c190f
|
3 |
+
size 436353726
|
full_models/aca/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/aca/vocab.txt
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
|
|
3 |
+
i
|
4 |
+
n
|
5 |
+
á
|
6 |
+
c
|
7 |
+
e
|
8 |
+
u
|
9 |
+
l
|
10 |
+
r
|
11 |
+
w
|
12 |
+
j
|
13 |
+
s
|
14 |
+
í
|
15 |
+
m
|
16 |
+
é
|
17 |
+
o
|
18 |
+
'
|
19 |
+
h
|
20 |
+
t
|
21 |
+
y
|
22 |
+
b
|
23 |
+
d
|
24 |
+
ú
|
25 |
+
q
|
26 |
+
ó
|
27 |
+
p
|
28 |
+
—
|
29 |
+
g
|
30 |
+
f
|
31 |
+
z
|
32 |
+
v
|
33 |
+
x
|
34 |
+
ñ
|
35 |
+
|
full_models/acd/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92c06f2853c68b0bc604a40caa9261cf439f0b02d66a510b08a0660b0f8e3201
|
3 |
+
size 561078480
|
full_models/acd/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2d3e15a2db968008b7b01eb6282b9c24115221f2abccd8935ee11a16d6f6cf9
|
3 |
+
size 436355114
|
full_models/acd/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/acd/vocab.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
a
|
3 |
+
ɛ
|
4 |
+
n
|
5 |
+
ɔ
|
6 |
+
i
|
7 |
+
o
|
8 |
+
m
|
9 |
+
y
|
10 |
+
e
|
11 |
+
u
|
12 |
+
g
|
13 |
+
s
|
14 |
+
k
|
15 |
+
b
|
16 |
+
r
|
17 |
+
l
|
18 |
+
d
|
19 |
+
w
|
20 |
+
f
|
21 |
+
-
|
22 |
+
t
|
23 |
+
p
|
24 |
+
'
|
25 |
+
ŋ
|
26 |
+
h
|
27 |
+
c
|
28 |
+
|
full_models/ace/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecacad3f1f738085d053f35c5618c4abceaee39c28dbff2975e17fc918cea5c7
|
3 |
+
size 561078594
|
full_models/ace/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:183895b264e9f91617ababbd088b5309ea79468730819506c8c8ab5e977085e6
|
3 |
+
size 436387528
|
full_models/ace/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/ace/vocab.txt
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
a
|
3 |
+
n
|
4 |
+
e
|
5 |
+
u
|
6 |
+
g
|
7 |
+
t
|
8 |
+
h
|
9 |
+
i
|
10 |
+
k
|
11 |
+
m
|
12 |
+
b
|
13 |
+
o
|
14 |
+
y
|
15 |
+
l
|
16 |
+
r
|
17 |
+
s
|
18 |
+
p
|
19 |
+
j
|
20 |
+
d
|
21 |
+
é
|
22 |
+
w
|
23 |
+
ô
|
24 |
+
ë
|
25 |
+
-
|
26 |
+
c
|
27 |
+
ö
|
28 |
+
á
|
29 |
+
ó
|
30 |
+
f
|
31 |
+
z
|
32 |
+
'
|
33 |
+
q
|
34 |
+
ú
|
35 |
+
`
|
36 |
+
0
|
37 |
+
6
|
38 |
+
4
|
39 |
+
3
|
40 |
+
1
|
41 |
+
2
|
42 |
+
|
full_models/acf/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:262ec2da7c0e7ded6eaa1cd84e37fe55127f23280d272d9453591df621511b21
|
3 |
+
size 561078869
|
full_models/acf/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91f23d7c73bfdf05bec98d6fc71dffbf19409a9afb463b6974abccdaee53c215
|
3 |
+
size 436369451
|
full_models/acf/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/acf/vocab.txt
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
a
|
3 |
+
n
|
4 |
+
é
|
5 |
+
i
|
6 |
+
o
|
7 |
+
s
|
8 |
+
t
|
9 |
+
k
|
10 |
+
y
|
11 |
+
p
|
12 |
+
l
|
13 |
+
w
|
14 |
+
m
|
15 |
+
è
|
16 |
+
u
|
17 |
+
d
|
18 |
+
-
|
19 |
+
e
|
20 |
+
b
|
21 |
+
v
|
22 |
+
j
|
23 |
+
ò
|
24 |
+
z
|
25 |
+
f
|
26 |
+
ʼ
|
27 |
+
h
|
28 |
+
g
|
29 |
+
c
|
30 |
+
r
|
31 |
+
—
|
32 |
+
'
|
33 |
+
|
full_models/ach/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d772464b61e01ae7415ad3b4cae63851b5fd89c15b6157b311446e3f3ea7460c
|
3 |
+
size 561078618
|
full_models/ach/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46031da7d624512421e115dcfadede182580c6c3139d2ea2b7c20b4fabee7e1d
|
3 |
+
size 436355251
|
full_models/ach/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/ach/vocab.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
o
|
3 |
+
a
|
4 |
+
i
|
5 |
+
e
|
6 |
+
k
|
7 |
+
n
|
8 |
+
m
|
9 |
+
w
|
10 |
+
t
|
11 |
+
u
|
12 |
+
y
|
13 |
+
l
|
14 |
+
c
|
15 |
+
d
|
16 |
+
b
|
17 |
+
g
|
18 |
+
r
|
19 |
+
p
|
20 |
+
ŋ
|
21 |
+
j
|
22 |
+
-
|
23 |
+
s
|
24 |
+
'
|
25 |
+
v
|
26 |
+
f
|
27 |
+
h
|
28 |
+
|
full_models/acn/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c7c7622cba3d3aa313a67157d78898fcffb20dc0855d9c9ac93e544a79b11f3
|
3 |
+
size 561098185
|
full_models/acn/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f608e1298f921bba8ebfa9e1eadb3c599aff4a0eb64614fb32a3d372340a9b4b
|
3 |
+
size 436533441
|
full_models/acn/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/acn/vocab.txt
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
u
|
2 |
+
k
|
3 |
+
g
|
4 |
+
a
|
5 |
+
_
|
6 |
+
—
|
7 |
+
d
|
8 |
+
w
|
9 |
+
i
|
10 |
+
o
|
11 |
+
-
|
12 |
+
b
|
13 |
+
e
|
14 |
+
n
|
15 |
+
t
|
16 |
+
y
|
17 |
+
p
|
18 |
+
s
|
19 |
+
z
|
20 |
+
x
|
21 |
+
m
|
22 |
+
h
|
23 |
+
c
|
24 |
+
|
25 |
+
l
|
26 |
+
0
|
27 |
+
2
|
28 |
+
j
|
29 |
+
f
|
30 |
+
3
|
31 |
+
5
|
32 |
+
q
|
33 |
+
v
|
34 |
+
r
|
35 |
+
6
|
36 |
+
1
|
37 |
+
4
|
full_models/acr/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2e790e6b73ad3a311ec0bd311e50a33846f38b8e63f074650687a6f588ea7df
|
3 |
+
size 561078709
|
full_models/acr/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:892faf58e703daf31d96bd7b9913d2b0eda8f750305282ba760a656280a5437f
|
3 |
+
size 436375881
|
full_models/acr/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/acr/vocab.txt
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
a
|
3 |
+
i
|
4 |
+
'
|
5 |
+
k
|
6 |
+
r
|
7 |
+
u
|
8 |
+
e
|
9 |
+
j
|
10 |
+
n
|
11 |
+
o
|
12 |
+
c
|
13 |
+
l
|
14 |
+
h
|
15 |
+
t
|
16 |
+
q
|
17 |
+
w
|
18 |
+
x
|
19 |
+
m
|
20 |
+
b
|
21 |
+
s
|
22 |
+
y
|
23 |
+
p
|
24 |
+
z
|
25 |
+
d
|
26 |
+
—
|
27 |
+
ú
|
28 |
+
g
|
29 |
+
á
|
30 |
+
é
|
31 |
+
ó
|
32 |
+
f
|
33 |
+
í
|
34 |
+
v
|
35 |
+
-
|
36 |
+
ñ
|
37 |
+
|
full_models/acu/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e8f2db7fee9018cff7d22ff7ede80bfcfb408c2cb3a38a3ccf32f1594865969
|
3 |
+
size 561078587
|
full_models/acu/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5952ecca5322b2410a5056b1852650d2fb9289f3cebce5e9b95a7ee76eabc621
|
3 |
+
size 436371382
|
full_models/acu/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/acu/vocab.txt
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a
|
2 |
+
|
|
3 |
+
i
|
4 |
+
n
|
5 |
+
u
|
6 |
+
t
|
7 |
+
r
|
8 |
+
m
|
9 |
+
k
|
10 |
+
s
|
11 |
+
e
|
12 |
+
h
|
13 |
+
j
|
14 |
+
c
|
15 |
+
w
|
16 |
+
y
|
17 |
+
p
|
18 |
+
g
|
19 |
+
o
|
20 |
+
í
|
21 |
+
—
|
22 |
+
ú
|
23 |
+
d
|
24 |
+
l
|
25 |
+
é
|
26 |
+
á
|
27 |
+
b
|
28 |
+
f
|
29 |
+
v
|
30 |
+
ó
|
31 |
+
z
|
32 |
+
q
|
33 |
+
x
|
34 |
+
ñ
|
35 |
+
|
full_models/ade/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41191e5d74955fec278ea692b2218c7920de8f16147aba556fdfce56b714f4c0
|
3 |
+
size 561078757
|
full_models/ade/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aed6df678ce5a605da1adef5aa7016ec89dd3b209ffebad3156843473d688668
|
3 |
+
size 436384590
|
full_models/ade/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/ade/vocab.txt
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
a
|
3 |
+
â
|
4 |
+
n
|
5 |
+
e
|
6 |
+
b
|
7 |
+
g
|
8 |
+
æ
|
9 |
+
i
|
10 |
+
t
|
11 |
+
w
|
12 |
+
ô
|
13 |
+
y
|
14 |
+
k
|
15 |
+
o
|
16 |
+
r
|
17 |
+
l
|
18 |
+
u
|
19 |
+
d
|
20 |
+
m
|
21 |
+
f
|
22 |
+
s
|
23 |
+
û
|
24 |
+
p
|
25 |
+
à
|
26 |
+
-
|
27 |
+
è
|
28 |
+
ã
|
29 |
+
õ
|
30 |
+
ù
|
31 |
+
î
|
32 |
+
å
|
33 |
+
ì
|
34 |
+
ü
|
35 |
+
ǹ
|
36 |
+
ò
|
37 |
+
h
|
38 |
+
'
|
39 |
+
c
|
40 |
+
|
full_models/adh/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f466d6d8e81596e107e5e3ebb2f5fea640411555a9ae7a6b719bc80f0d11f42
|
3 |
+
size 561078757
|
full_models/adh/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6a6d6796cd6997ca30c352c12f25d36db4d80ac374e42318aeba8a2ce1905a
|
3 |
+
size 436360329
|
full_models/adh/config.json
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 200,
|
4 |
+
"eval_interval": 1000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 20000,
|
7 |
+
"learning_rate": 0.0002,
|
8 |
+
"betas": [
|
9 |
+
0.8,
|
10 |
+
0.99
|
11 |
+
],
|
12 |
+
"eps": 1e-09,
|
13 |
+
"batch_size": 64,
|
14 |
+
"fp16_run": true,
|
15 |
+
"lr_decay": 0.999875,
|
16 |
+
"segment_size": 8192,
|
17 |
+
"init_lr_ratio": 1,
|
18 |
+
"warmup_epochs": 0,
|
19 |
+
"c_mel": 45,
|
20 |
+
"c_kl": 1.0
|
21 |
+
},
|
22 |
+
"data": {
|
23 |
+
"training_files": "train.ltr",
|
24 |
+
"validation_files": "dev.ltr",
|
25 |
+
"text_cleaners": [
|
26 |
+
"transliteration_cleaners"
|
27 |
+
],
|
28 |
+
"max_wav_value": 32768.0,
|
29 |
+
"sampling_rate": 16000,
|
30 |
+
"filter_length": 1024,
|
31 |
+
"hop_length": 256,
|
32 |
+
"win_length": 1024,
|
33 |
+
"n_mel_channels": 80,
|
34 |
+
"mel_fmin": 0.0,
|
35 |
+
"mel_fmax": null,
|
36 |
+
"add_blank": true,
|
37 |
+
"n_speakers": 0,
|
38 |
+
"cleaned_text": true
|
39 |
+
},
|
40 |
+
"model": {
|
41 |
+
"inter_channels": 192,
|
42 |
+
"hidden_channels": 192,
|
43 |
+
"filter_channels": 768,
|
44 |
+
"n_heads": 2,
|
45 |
+
"n_layers": 6,
|
46 |
+
"kernel_size": 3,
|
47 |
+
"p_dropout": 0.1,
|
48 |
+
"resblock": "1",
|
49 |
+
"resblock_kernel_sizes": [
|
50 |
+
3,
|
51 |
+
7,
|
52 |
+
11
|
53 |
+
],
|
54 |
+
"resblock_dilation_sizes": [
|
55 |
+
[
|
56 |
+
1,
|
57 |
+
3,
|
58 |
+
5
|
59 |
+
],
|
60 |
+
[
|
61 |
+
1,
|
62 |
+
3,
|
63 |
+
5
|
64 |
+
],
|
65 |
+
[
|
66 |
+
1,
|
67 |
+
3,
|
68 |
+
5
|
69 |
+
]
|
70 |
+
],
|
71 |
+
"upsample_rates": [
|
72 |
+
8,
|
73 |
+
8,
|
74 |
+
2,
|
75 |
+
2
|
76 |
+
],
|
77 |
+
"upsample_initial_channel": 512,
|
78 |
+
"upsample_kernel_sizes": [
|
79 |
+
16,
|
80 |
+
16,
|
81 |
+
4,
|
82 |
+
4
|
83 |
+
],
|
84 |
+
"n_layers_q": 3,
|
85 |
+
"use_spectral_norm": false
|
86 |
+
}
|
87 |
+
}
|
full_models/adh/vocab.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
|
2 |
+
o
|
3 |
+
i
|
4 |
+
a
|
5 |
+
e
|
6 |
+
n
|
7 |
+
k
|
8 |
+
m
|
9 |
+
w
|
10 |
+
r
|
11 |
+
y
|
12 |
+
h
|
13 |
+
t
|
14 |
+
d
|
15 |
+
j
|
16 |
+
g
|
17 |
+
u
|
18 |
+
l
|
19 |
+
p
|
20 |
+
c
|
21 |
+
b
|
22 |
+
ŋ
|
23 |
+
s
|
24 |
+
f
|
25 |
+
'
|
26 |
+
z
|
27 |
+
v
|
28 |
+
-
|
29 |
+
|
full_models/adj/D_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77757247ee18a706cb7cf12cf3c01269b4d96d9db1810496d5cecde1e4b37c5e
|
3 |
+
size 561078768
|
full_models/adj/G_100000.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2841867f1f11ff7ef9a742e17c9847b747fbad5b2e027b43bf2268f1570e04d5
|
3 |
+
size 436399987
|