chipewyan commited on
Commit
f03b7a7
1 Parent(s): 11452ac
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 310,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.26.0.dev0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 311,
115
+ "xvector_output_dim": 512
116
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd07d6c8d002533730a944c44b8d69d21bba1c08aee71dc3925777baab716d88
3
+ size 1263171053
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "pad_token": "[PAD]",
7
+ "replace_word_delimiter_char": " ",
8
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
9
+ "unk_token": "[UNK]",
10
+ "word_delimiter_token": "|"
11
+ }
vocab.json ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 29,
3
+ "#": 115,
4
+ "'": 138,
5
+ "[PAD]": 310,
6
+ "[UNK]": 309,
7
+ "a": 231,
8
+ "ã": 39,
9
+ "b": 44,
10
+ "b̪": 147,
11
+ "b̪͡v": 171,
12
+ "b͡ꞵ": 214,
13
+ "c": 46,
14
+ "cʼ": 279,
15
+ "c͡ç": 168,
16
+ "d": 40,
17
+ "d̼": 289,
18
+ "d͡z": 195,
19
+ "d͡ð": 133,
20
+ "d͡ɮ": 173,
21
+ "d͡ʑ": 41,
22
+ "d͡ʒ": 270,
23
+ "e": 250,
24
+ "ẽ": 280,
25
+ "e̞": 200,
26
+ "ẽ̞": 131,
27
+ "f": 172,
28
+ "fʼ": 220,
29
+ "g": 18,
30
+ "h": 19,
31
+ "i": 238,
32
+ "j": 259,
33
+ "k": 207,
34
+ "kxʼ": 181,
35
+ "kǀ": 123,
36
+ "kǁ": 161,
37
+ "kǂ": 4,
38
+ "kǃ": 304,
39
+ "kʘ": 139,
40
+ "kʼ": 198,
41
+ "k̚": 230,
42
+ "k͡p": 11,
43
+ "k͡x": 95,
44
+ "l": 156,
45
+ "m": 36,
46
+ "m̥": 286,
47
+ "n": 196,
48
+ "n̥": 178,
49
+ "n̼": 260,
50
+ "o": 49,
51
+ "õ": 276,
52
+ "o̞": 201,
53
+ "õ̞": 80,
54
+ "p": 68,
55
+ "pʼ": 91,
56
+ "p̚": 189,
57
+ "p̪": 184,
58
+ "p̪͡f": 21,
59
+ "p͡f": 271,
60
+ "p͡ɸ": 247,
61
+ "q": 99,
62
+ "qǀ": 128,
63
+ "qǁ": 284,
64
+ "qǂ": 30,
65
+ "qǃ": 100,
66
+ "qʘ": 268,
67
+ "qʼ": 98,
68
+ "q͡ʡ": 232,
69
+ "q͡χʼ": 58,
70
+ "q͡ꭓ": 302,
71
+ "r": 166,
72
+ "r̥": 149,
73
+ "s": 104,
74
+ "sʼ": 37,
75
+ "t": 124,
76
+ "tʼ": 224,
77
+ "t̚": 14,
78
+ "t̪͡θʼ": 12,
79
+ "t̼": 54,
80
+ "t͡s": 1,
81
+ "t͡sʼ": 187,
82
+ "t͡ɕ": 125,
83
+ "t͡ɬ": 244,
84
+ "t͡ɬʼ": 129,
85
+ "t͡ʃ": 251,
86
+ "t͡ʃʼ": 253,
87
+ "t͡θ": 300,
88
+ "u": 150,
89
+ "ũ": 111,
90
+ "v": 186,
91
+ "w": 60,
92
+ "x": 252,
93
+ "xʼ": 254,
94
+ "y": 118,
95
+ "ỹ": 204,
96
+ "z": 246,
97
+ "ä": 94,
98
+ "ä̃": 113,
99
+ "æ": 245,
100
+ "æ̃": 154,
101
+ "ç": 132,
102
+ "é": 28,
103
+ "ð": 188,
104
+ "ð̠": 237,
105
+ "ð̼": 109,
106
+ "ø": 294,
107
+ "ø̃": 38,
108
+ "ø̞": 105,
109
+ "ø̞̃": 73,
110
+ "ħ": 130,
111
+ "ŋ": 88,
112
+ "ŋǀ": 223,
113
+ "ŋǁ": 301,
114
+ "ŋǂ": 273,
115
+ "ŋǃ": 143,
116
+ "ŋʘ": 81,
117
+ "ŋ̊": 182,
118
+ "ŋ͡m": 169,
119
+ "œ": 87,
120
+ "œ̃": 190,
121
+ "ǀ": 291,
122
+ "ǃ": 92,
123
+ "ɐ": 208,
124
+ "ɐ̃": 151,
125
+ "ɑ": 163,
126
+ "ɑ̃": 235,
127
+ "ɒ": 285,
128
+ "ɒ̃": 24,
129
+ "ɓ": 258,
130
+ "ɓ̥": 274,
131
+ "ɔ": 167,
132
+ "ɔ̃": 64,
133
+ "ɕ": 192,
134
+ "ɕʼ": 278,
135
+ "ɖ": 180,
136
+ "ɖ͡ʐ": 45,
137
+ "ɗ": 287,
138
+ "ɗ̥": 206,
139
+ "ɘ": 61,
140
+ "ɘ̃": 266,
141
+ "ə": 305,
142
+ "ə̃": 135,
143
+ "ɚ": 144,
144
+ "ɛ": 262,
145
+ "ɛ̃": 183,
146
+ "ɜ": 282,
147
+ "ɜ̃": 47,
148
+ "ɝ": 297,
149
+ "ɞ": 191,
150
+ "ɞ̃": 48,
151
+ "ɟ": 96,
152
+ "ɟ͡ʝ": 165,
153
+ "ɠ": 218,
154
+ "ɠ̊": 116,
155
+ "ɡ": 242,
156
+ "ɡǀ": 117,
157
+ "ɡǁ": 225,
158
+ "ɡǂ": 51,
159
+ "ɡǃ": 193,
160
+ "ɡʘ": 10,
161
+ "ɡ̆": 264,
162
+ "ɡ͡b": 296,
163
+ "ɡ͡ɣ": 185,
164
+ "ɢ": 72,
165
+ "ɢǀ": 5,
166
+ "ɢǁ": 211,
167
+ "ɢǂ": 103,
168
+ "ɢǃ": 140,
169
+ "ɢʘ": 210,
170
+ "ɢ̆": 226,
171
+ "ɢ͡ʁ": 240,
172
+ "ɣ": 288,
173
+ "ɤ": 66,
174
+ "ɤ̃": 110,
175
+ "ɤ̞": 292,
176
+ "ɤ̞̃": 50,
177
+ "ɥ": 199,
178
+ "ɥ̊": 215,
179
+ "ɧ": 33,
180
+ "ɨ": 164,
181
+ "ɨ̃": 307,
182
+ "ɪ": 26,
183
+ "ɪ̃": 31,
184
+ "ɬ": 78,
185
+ "ɬʼ": 69,
186
+ "ɭ": 239,
187
+ "ɭ˔": 249,
188
+ "ɭ̆": 76,
189
+ "ɭ̥̆": 148,
190
+ "ɮ": 216,
191
+ "ɯ": 83,
192
+ "ɯ̃": 20,
193
+ "ɰ": 77,
194
+ "ɱ": 119,
195
+ "ɲ": 142,
196
+ "ɲ̊": 102,
197
+ "ɳ": 22,
198
+ "ɳ̊": 158,
199
+ "ɴ": 153,
200
+ "ɴǀ": 248,
201
+ "ɴǁ": 160,
202
+ "ɴǂ": 293,
203
+ "ɴǃ": 298,
204
+ "ɴʘ": 299,
205
+ "ɵ": 0,
206
+ "ɵ̃": 227,
207
+ "ɶ": 209,
208
+ "ɶ̃": 67,
209
+ "ɸ": 97,
210
+ "ɸʼ": 106,
211
+ "ɹ": 121,
212
+ "ɹ̠˔": 236,
213
+ "ɹ̠̊˔": 179,
214
+ "ɺ": 243,
215
+ "ɺ̥": 277,
216
+ "ɻ": 89,
217
+ "ɻ˔": 65,
218
+ "ɻ̊˔": 134,
219
+ "ɽ": 233,
220
+ "ɽ̊": 15,
221
+ "ɾ": 34,
222
+ "ɾ̥": 55,
223
+ "ɾ̼": 9,
224
+ "ʀ": 85,
225
+ "ʀ̥": 107,
226
+ "ʁ": 202,
227
+ "ʂ": 176,
228
+ "ʂʼ": 141,
229
+ "ʃ": 59,
230
+ "ʃʼ": 3,
231
+ "ʄ": 8,
232
+ "ʄ̊": 137,
233
+ "ʈ": 265,
234
+ "ʈʼ": 194,
235
+ "ʈ͡ʂ": 145,
236
+ "ʈ͡ʂʼ": 255,
237
+ "ʉ": 16,
238
+ "ʉ̃": 213,
239
+ "ʊ": 229,
240
+ "ʊ̃": 42,
241
+ "ʋ": 27,
242
+ "ʌ": 306,
243
+ "ʌ̃": 162,
244
+ "ʍ": 219,
245
+ "ʎ": 126,
246
+ "ʎ̆": 146,
247
+ "ʎ̝": 2,
248
+ "ʎ̝̊": 127,
249
+ "ʏ": 74,
250
+ "ʏ̃": 23,
251
+ "ʐ": 75,
252
+ "ʑ": 228,
253
+ "ʒ": 90,
254
+ "ʔ": 175,
255
+ "ʔ̞": 101,
256
+ "ʔ͡h": 261,
257
+ "ʕ": 241,
258
+ "ʙ": 52,
259
+ "ʙ̥": 70,
260
+ "ʛ": 112,
261
+ "ʛ̥": 93,
262
+ "ʜ": 152,
263
+ "ʝ": 35,
264
+ "ʟ": 221,
265
+ "ʟ̆": 57,
266
+ "ʟ̝": 256,
267
+ "ʟ̝̊": 275,
268
+ "ʟ̠": 122,
269
+ "ʡ": 269,
270
+ "ʡʼ": 170,
271
+ "ʡ̆": 43,
272
+ "ʡ͡ʜ": 159,
273
+ "ʡ͡ʢ": 177,
274
+ "ʢ": 7,
275
+ "ʰ": 32,
276
+ "ʲ": 272,
277
+ "ʷ": 157,
278
+ "ʼ": 17,
279
+ "ː": 290,
280
+ "˞": 84,
281
+ "ˠ": 114,
282
+ "ˡ": 283,
283
+ "ˣ": 62,
284
+ "ˤ": 86,
285
+ "̃": 6,
286
+ "̇": 263,
287
+ "̞": 308,
288
+ "̪": 63,
289
+ "͡": 267,
290
+ "ΐ": 13,
291
+ "β": 53,
292
+ "θ": 303,
293
+ "θʼ": 56,
294
+ "θ̠": 257,
295
+ "θ̼": 295,
296
+ "χ": 203,
297
+ "χʼ": 281,
298
+ "ϊ": 217,
299
+ "ஃ": 79,
300
+ "ᵐ": 82,
301
+ "ᵑ": 120,
302
+ "ᶑ": 212,
303
+ "ᶑ̊": 108,
304
+ "ᶬ": 155,
305
+ "ᶮ": 197,
306
+ "ᶯ": 71,
307
+ "ᶰ": 136,
308
+ "ᶿ": 222,
309
+ "ⁿ": 234,
310
+ "ⱱ": 174,
311
+ "ⱱ̟": 205,
312
+ "ꞎ": 25
313
+ }