Bowen7 commited on
Commit
0298665
1 Parent(s): 1889ef9

Upload 7 files

Browse files
Files changed (7) hide show
  1. added_tokens.json +3 -0
  2. config.json +82 -0
  3. model.onnx +3 -0
  4. special_tokens_map.json +4 -0
  5. tokenizer.json +223 -0
  6. tokenizer_config.json +30 -0
  7. vocab.json +179 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<unk>": 177
3
+ }
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "kakao-enterprise/vits-vctk",
3
+ "activation_dropout": 0.1,
4
+ "architectures": [
5
+ "VitsModel"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "depth_separable_channels": 2,
9
+ "depth_separable_num_layers": 3,
10
+ "duration_predictor_dropout": 0.5,
11
+ "duration_predictor_filter_channels": 256,
12
+ "duration_predictor_flow_bins": 10,
13
+ "duration_predictor_kernel_size": 3,
14
+ "duration_predictor_num_flows": 4,
15
+ "duration_predictor_tail_bound": 5.0,
16
+ "ffn_dim": 768,
17
+ "ffn_kernel_size": 3,
18
+ "flow_size": 192,
19
+ "hidden_act": "relu",
20
+ "hidden_dropout": 0.1,
21
+ "hidden_size": 192,
22
+ "initializer_range": 0.02,
23
+ "layer_norm_eps": 1e-05,
24
+ "layerdrop": 0.1,
25
+ "leaky_relu_slope": 0.1,
26
+ "model_type": "vits",
27
+ "noise_scale": 0.667,
28
+ "noise_scale_duration": 0.8,
29
+ "num_attention_heads": 2,
30
+ "num_hidden_layers": 6,
31
+ "num_speakers": 109,
32
+ "posterior_encoder_num_wavenet_layers": 16,
33
+ "prior_encoder_num_flows": 4,
34
+ "prior_encoder_num_wavenet_layers": 4,
35
+ "resblock_dilation_sizes": [
36
+ [
37
+ 1,
38
+ 3,
39
+ 5
40
+ ],
41
+ [
42
+ 1,
43
+ 3,
44
+ 5
45
+ ],
46
+ [
47
+ 1,
48
+ 3,
49
+ 5
50
+ ]
51
+ ],
52
+ "resblock_kernel_sizes": [
53
+ 3,
54
+ 7,
55
+ 11
56
+ ],
57
+ "sampling_rate": 22050,
58
+ "speaker_embedding_size": 256,
59
+ "speaking_rate": 1.0,
60
+ "spectrogram_bins": 513,
61
+ "transformers_version": "4.38.2",
62
+ "upsample_initial_channel": 512,
63
+ "upsample_kernel_sizes": [
64
+ 16,
65
+ 16,
66
+ 4,
67
+ 4
68
+ ],
69
+ "upsample_rates": [
70
+ 8,
71
+ 8,
72
+ 2,
73
+ 2
74
+ ],
75
+ "use_bias": true,
76
+ "use_stochastic_duration_prediction": true,
77
+ "vocab_size": 178,
78
+ "wavenet_dilation_rate": 1,
79
+ "wavenet_dropout": 0.0,
80
+ "wavenet_kernel_size": 5,
81
+ "window_size": 4
82
+ }
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a229ee3e8b5d6c43c192ff967d4b433c221af95c85642cd45d7be599e434cb
3
+ size 114365937
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "pad_token": "_",
3
+ "unk_token": "<unk>"
4
+ }
tokenizer.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 177,
8
+ "content": "<unk>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ }
15
+ ],
16
+ "normalizer": {
17
+ "type": "Sequence",
18
+ "normalizers": [
19
+ {
20
+ "type": "Lowercase"
21
+ },
22
+ {
23
+ "type": "Replace",
24
+ "pattern": {
25
+ "Regex": "(?=.)|(?<!^)$"
26
+ },
27
+ "content": "_"
28
+ }
29
+ ]
30
+ },
31
+ "pre_tokenizer": {
32
+ "type": "Split",
33
+ "pattern": {
34
+ "Regex": ""
35
+ },
36
+ "behavior": "Isolated",
37
+ "invert": false
38
+ },
39
+ "post_processor": null,
40
+ "decoder": null,
41
+ "model": {
42
+ "vocab": {
43
+ "_": 0,
44
+ ";": 1,
45
+ ":": 2,
46
+ ",": 3,
47
+ ".": 4,
48
+ "!": 5,
49
+ "?": 6,
50
+ "\u00a1": 7,
51
+ "\u00bf": 8,
52
+ "\u2014": 9,
53
+ "\u2026": 10,
54
+ "\"": 11,
55
+ "\u00ab": 12,
56
+ "\u00bb": 13,
57
+ "\u201c": 14,
58
+ "\u201d": 15,
59
+ " ": 16,
60
+ "A": 17,
61
+ "B": 18,
62
+ "C": 19,
63
+ "D": 20,
64
+ "E": 21,
65
+ "F": 22,
66
+ "G": 23,
67
+ "H": 24,
68
+ "I": 25,
69
+ "J": 26,
70
+ "K": 27,
71
+ "L": 28,
72
+ "M": 29,
73
+ "N": 30,
74
+ "O": 31,
75
+ "P": 32,
76
+ "Q": 33,
77
+ "R": 34,
78
+ "S": 35,
79
+ "T": 36,
80
+ "U": 37,
81
+ "V": 38,
82
+ "W": 39,
83
+ "X": 40,
84
+ "Y": 41,
85
+ "Z": 42,
86
+ "a": 43,
87
+ "b": 44,
88
+ "c": 45,
89
+ "d": 46,
90
+ "e": 47,
91
+ "f": 48,
92
+ "g": 49,
93
+ "h": 50,
94
+ "i": 51,
95
+ "j": 52,
96
+ "k": 53,
97
+ "l": 54,
98
+ "m": 55,
99
+ "n": 56,
100
+ "o": 57,
101
+ "p": 58,
102
+ "q": 59,
103
+ "r": 60,
104
+ "s": 61,
105
+ "t": 62,
106
+ "u": 63,
107
+ "v": 64,
108
+ "w": 65,
109
+ "x": 66,
110
+ "y": 67,
111
+ "z": 68,
112
+ "\u0251": 69,
113
+ "\u0250": 70,
114
+ "\u0252": 71,
115
+ "\u00e6": 72,
116
+ "\u0253": 73,
117
+ "\u0299": 74,
118
+ "\u03b2": 75,
119
+ "\u0254": 76,
120
+ "\u0255": 77,
121
+ "\u00e7": 78,
122
+ "\u0257": 79,
123
+ "\u0256": 80,
124
+ "\u00f0": 81,
125
+ "\u02a4": 82,
126
+ "\u0259": 83,
127
+ "\u0258": 84,
128
+ "\u025a": 85,
129
+ "\u025b": 86,
130
+ "\u025c": 87,
131
+ "\u025d": 88,
132
+ "\u025e": 89,
133
+ "\u025f": 90,
134
+ "\u0284": 91,
135
+ "\u0261": 92,
136
+ "\u0260": 93,
137
+ "\u0262": 94,
138
+ "\u029b": 95,
139
+ "\u0266": 96,
140
+ "\u0267": 97,
141
+ "\u0127": 98,
142
+ "\u0265": 99,
143
+ "\u029c": 100,
144
+ "\u0268": 101,
145
+ "\u026a": 102,
146
+ "\u029d": 103,
147
+ "\u026d": 104,
148
+ "\u026c": 105,
149
+ "\u026b": 106,
150
+ "\u026e": 107,
151
+ "\u029f": 108,
152
+ "\u0271": 109,
153
+ "\u026f": 110,
154
+ "\u0270": 111,
155
+ "\u014b": 112,
156
+ "\u0273": 113,
157
+ "\u0272": 114,
158
+ "\u0274": 115,
159
+ "\u00f8": 116,
160
+ "\u0275": 117,
161
+ "\u0278": 118,
162
+ "\u03b8": 119,
163
+ "\u0153": 120,
164
+ "\u0276": 121,
165
+ "\u0298": 122,
166
+ "\u0279": 123,
167
+ "\u027a": 124,
168
+ "\u027e": 125,
169
+ "\u027b": 126,
170
+ "\u0280": 127,
171
+ "\u0281": 128,
172
+ "\u027d": 129,
173
+ "\u0282": 130,
174
+ "\u0283": 131,
175
+ "\u0288": 132,
176
+ "\u02a7": 133,
177
+ "\u0289": 134,
178
+ "\u028a": 135,
179
+ "\u028b": 136,
180
+ "\u2c71": 137,
181
+ "\u028c": 138,
182
+ "\u0263": 139,
183
+ "\u0264": 140,
184
+ "\u028d": 141,
185
+ "\u03c7": 142,
186
+ "\u028e": 143,
187
+ "\u028f": 144,
188
+ "\u0291": 145,
189
+ "\u0290": 146,
190
+ "\u0292": 147,
191
+ "\u0294": 148,
192
+ "\u02a1": 149,
193
+ "\u0295": 150,
194
+ "\u02a2": 151,
195
+ "\u01c0": 152,
196
+ "\u01c1": 153,
197
+ "\u01c2": 154,
198
+ "\u01c3": 155,
199
+ "\u02c8": 156,
200
+ "\u02cc": 157,
201
+ "\u02d0": 158,
202
+ "\u02d1": 159,
203
+ "\u02bc": 160,
204
+ "\u02b4": 161,
205
+ "\u02b0": 162,
206
+ "\u02b1": 163,
207
+ "\u02b2": 164,
208
+ "\u02b7": 165,
209
+ "\u02e0": 166,
210
+ "\u02e4": 167,
211
+ "\u02de": 168,
212
+ "\u2193": 169,
213
+ "\u2191": 170,
214
+ "\u2192": 171,
215
+ "\u2197": 172,
216
+ "\u2198": 173,
217
+ "null": 174,
218
+ "\u0329": 175,
219
+ "'": 176,
220
+ "<unk>": 177
221
+ }
222
+ }
223
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_blank": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "_",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "177": {
13
+ "content": "<unk>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "clean_up_tokenization_spaces": true,
22
+ "is_uroman": false,
23
+ "language": null,
24
+ "model_max_length": 1000000000000000019884624838656,
25
+ "normalize": true,
26
+ "pad_token": "_",
27
+ "phonemize": true,
28
+ "tokenizer_class": "VitsTokenizer",
29
+ "unk_token": "<unk>"
30
+ }
vocab.json ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 16,
3
+ "!": 5,
4
+ "\"": 11,
5
+ "'": 176,
6
+ ",": 3,
7
+ ".": 4,
8
+ ":": 2,
9
+ ";": 1,
10
+ "?": 6,
11
+ "A": 17,
12
+ "B": 18,
13
+ "C": 19,
14
+ "D": 20,
15
+ "E": 21,
16
+ "F": 22,
17
+ "G": 23,
18
+ "H": 24,
19
+ "I": 25,
20
+ "J": 26,
21
+ "K": 27,
22
+ "L": 28,
23
+ "M": 29,
24
+ "N": 30,
25
+ "O": 31,
26
+ "P": 32,
27
+ "Q": 33,
28
+ "R": 34,
29
+ "S": 35,
30
+ "T": 36,
31
+ "U": 37,
32
+ "V": 38,
33
+ "W": 39,
34
+ "X": 40,
35
+ "Y": 41,
36
+ "Z": 42,
37
+ "_": 0,
38
+ "a": 43,
39
+ "b": 44,
40
+ "c": 45,
41
+ "d": 46,
42
+ "e": 47,
43
+ "f": 48,
44
+ "g": 49,
45
+ "h": 50,
46
+ "i": 51,
47
+ "j": 52,
48
+ "k": 53,
49
+ "l": 54,
50
+ "m": 55,
51
+ "n": 56,
52
+ "o": 57,
53
+ "p": 58,
54
+ "q": 59,
55
+ "r": 60,
56
+ "s": 61,
57
+ "t": 62,
58
+ "u": 63,
59
+ "v": 64,
60
+ "w": 65,
61
+ "x": 66,
62
+ "y": 67,
63
+ "z": 68,
64
+ "¡": 7,
65
+ "«": 12,
66
+ "»": 13,
67
+ "¿": 8,
68
+ "æ": 72,
69
+ "ç": 78,
70
+ "ð": 81,
71
+ "ø": 116,
72
+ "ħ": 98,
73
+ "ŋ": 112,
74
+ "œ": 120,
75
+ "ǀ": 152,
76
+ "ǁ": 153,
77
+ "ǂ": 154,
78
+ "ǃ": 155,
79
+ "ɐ": 70,
80
+ "ɑ": 69,
81
+ "ɒ": 71,
82
+ "ɓ": 73,
83
+ "ɔ": 76,
84
+ "ɕ": 77,
85
+ "ɖ": 80,
86
+ "ɗ": 79,
87
+ "ɘ": 84,
88
+ "ə": 83,
89
+ "ɚ": 85,
90
+ "ɛ": 86,
91
+ "ɜ": 87,
92
+ "ɝ": 88,
93
+ "ɞ": 89,
94
+ "ɟ": 90,
95
+ "ɠ": 93,
96
+ "ɡ": 92,
97
+ "ɢ": 94,
98
+ "ɣ": 139,
99
+ "ɤ": 140,
100
+ "ɥ": 99,
101
+ "ɦ": 96,
102
+ "ɧ": 97,
103
+ "ɨ": 101,
104
+ "ɪ": 102,
105
+ "ɫ": 106,
106
+ "ɬ": 105,
107
+ "ɭ": 104,
108
+ "ɮ": 107,
109
+ "ɯ": 110,
110
+ "ɰ": 111,
111
+ "ɱ": 109,
112
+ "ɲ": 114,
113
+ "ɳ": 113,
114
+ "ɴ": 115,
115
+ "ɵ": 117,
116
+ "ɶ": 121,
117
+ "ɸ": 118,
118
+ "ɹ": 123,
119
+ "ɺ": 124,
120
+ "ɻ": 126,
121
+ "ɽ": 129,
122
+ "ɾ": 125,
123
+ "ʀ": 127,
124
+ "ʁ": 128,
125
+ "ʂ": 130,
126
+ "ʃ": 131,
127
+ "ʄ": 91,
128
+ "ʈ": 132,
129
+ "ʉ": 134,
130
+ "ʊ": 135,
131
+ "ʋ": 136,
132
+ "ʌ": 138,
133
+ "ʍ": 141,
134
+ "ʎ": 143,
135
+ "ʏ": 144,
136
+ "ʐ": 146,
137
+ "ʑ": 145,
138
+ "ʒ": 147,
139
+ "ʔ": 148,
140
+ "ʕ": 150,
141
+ "ʘ": 122,
142
+ "ʙ": 74,
143
+ "ʛ": 95,
144
+ "ʜ": 100,
145
+ "ʝ": 103,
146
+ "ʟ": 108,
147
+ "ʡ": 149,
148
+ "ʢ": 151,
149
+ "ʤ": 82,
150
+ "ʧ": 133,
151
+ "ʰ": 162,
152
+ "ʱ": 163,
153
+ "ʲ": 164,
154
+ "ʴ": 161,
155
+ "ʷ": 165,
156
+ "ʼ": 160,
157
+ "ˈ": 156,
158
+ "ˌ": 157,
159
+ "ː": 158,
160
+ "ˑ": 159,
161
+ "˞": 168,
162
+ "ˠ": 166,
163
+ "ˤ": 167,
164
+ "̩": 175,
165
+ "β": 75,
166
+ "θ": 119,
167
+ "χ": 142,
168
+ "ᵻ": 177,
169
+ "—": 9,
170
+ "“": 14,
171
+ "”": 15,
172
+ "…": 10,
173
+ "↑": 170,
174
+ "→": 171,
175
+ "↓": 169,
176
+ "↗": 172,
177
+ "↘": 173,
178
+ "ⱱ": 137
179
+ }