claudios commited on
Commit
cbcf394
·
verified ·
1 Parent(s): 0f48c53

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
ast/special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[SOS]",
3
+ "eos_token": "[EOS]",
4
+ "mask_token": "[MSK]",
5
+ "pad_token": "[PAD]",
6
+ "sep_token": "[SEP]",
7
+ "unk_token": "[UNK]"
8
+ }
ast/tokenizer.json ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[PAD]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[SOS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[EOS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[UNK]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MSK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 5,
53
+ "content": "[SEP]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ }
60
+ ],
61
+ "normalizer": {
62
+ "type": "Sequence",
63
+ "normalizers": [
64
+ {
65
+ "type": "NFD"
66
+ },
67
+ {
68
+ "type": "StripAccents"
69
+ },
70
+ {
71
+ "type": "Strip",
72
+ "strip_left": true,
73
+ "strip_right": true
74
+ }
75
+ ]
76
+ },
77
+ "pre_tokenizer": {
78
+ "type": "Whitespace"
79
+ },
80
+ "post_processor": null,
81
+ "decoder": null,
82
+ "model": {
83
+ "type": "WordLevel",
84
+ "vocab": {
85
+ "[PAD]": 0,
86
+ "[SOS]": 1,
87
+ "[EOS]": 2,
88
+ "[UNK]": 3,
89
+ "[MSK]": 4,
90
+ "[SEP]": 5,
91
+ "expression_statement__": 6,
92
+ "__expression_statement": 7,
93
+ "assignment": 8,
94
+ "__if_statement": 9,
95
+ "if_statement__": 10,
96
+ "expression_statement": 11,
97
+ "return_statement": 12,
98
+ "__parenthesized_expression": 13,
99
+ "parenthesized_expression__": 14,
100
+ "binary_expression": 15,
101
+ "binary_expression__": 16,
102
+ "__binary_expression": 17,
103
+ "__call_expression": 18,
104
+ "call_expression__": 19,
105
+ "assignment_expression__": 20,
106
+ "__assignment_expression": 21,
107
+ "member_expression": 22,
108
+ "selector_expression": 23,
109
+ "compound_statement__": 24,
110
+ "__compound_statement": 25,
111
+ "local_variable_declaration": 26,
112
+ "return_statement__": 27,
113
+ "__return_statement": 28,
114
+ "for_statement__": 29,
115
+ "__for_statement": 30,
116
+ "assignment_expression": 31,
117
+ "member_access_expression": 32,
118
+ "object_creation_expression": 33,
119
+ "call_statement": 34,
120
+ "member_call_expression": 35,
121
+ "subscript_expression": 36,
122
+ "local_variable_declaration__": 37,
123
+ "__local_variable_declaration": 38,
124
+ "member_call_expression__": 39,
125
+ "__member_call_expression": 40,
126
+ "parenthesized_expression": 41,
127
+ "__assignment_statement": 42,
128
+ "assignment_statement__": 43,
129
+ "try_statement__": 44,
130
+ "__try_statement": 45,
131
+ "call_expression": 46,
132
+ "unary_expression": 47,
133
+ "function_call_expression": 48,
134
+ "raise_statement": 49,
135
+ "__member_expression": 50,
136
+ "member_expression__": 51,
137
+ "function_call_expression__": 52,
138
+ "__function_call_expression": 53,
139
+ "__short_var_declaration": 54,
140
+ "short_var_declaration__": 55,
141
+ "subscript_expression__": 56,
142
+ "__subscript_expression": 57,
143
+ "call_statement__": 58,
144
+ "__call_statement": 59,
145
+ "array_creation_expression": 60,
146
+ "selector_expression__": 61,
147
+ "__selector_expression": 62,
148
+ "__throw_statement": 63,
149
+ "throw_statement__": 64,
150
+ "cast_expression": 65,
151
+ "update_expression": 66,
152
+ "object_creation_expression__": 67,
153
+ "__object_creation_expression": 68,
154
+ "augmented_assignment": 69,
155
+ "unary_expression__": 70,
156
+ "__unary_expression": 71,
157
+ "break_statement": 72,
158
+ "__unary_op_expression": 73,
159
+ "unary_op_expression__": 74,
160
+ "enhanced_for_statement__": 75,
161
+ "__enhanced_for_statement": 76,
162
+ "scoped_call_expression": 77,
163
+ "class_constant_access_expression": 78,
164
+ "assignment_statement": 79,
165
+ "while_statement__": 80,
166
+ "__while_statement": 81,
167
+ "if_statement": 82,
168
+ "continue_statement": 83,
169
+ "ternary_expression__": 84,
170
+ "__ternary_expression": 85,
171
+ "__foreach_statement": 86,
172
+ "foreach_statement__": 87,
173
+ "with_statement__": 88,
174
+ "__with_statement": 89,
175
+ "array_creation_expression__": 90,
176
+ "__array_creation_expression": 91,
177
+ "instanceof_expression": 92,
178
+ "throw_expression__": 93,
179
+ "__throw_expression": 94,
180
+ "assert_statement": 95,
181
+ "conditional_expression__": 96,
182
+ "__conditional_expression": 97,
183
+ "__scoped_call_expression": 98,
184
+ "scoped_call_expression__": 99,
185
+ "index_expression": 100,
186
+ "__member_access_expression": 101,
187
+ "member_access_expression__": 102,
188
+ "cast_expression__": 103,
189
+ "__cast_expression": 104,
190
+ "index_expression__": 105,
191
+ "__index_expression": 106,
192
+ "__augmented_assignment_expression": 107,
193
+ "augmented_assignment_expression__": 108,
194
+ "pass_statement": 109,
195
+ "scoped_property_access_expression": 110,
196
+ "new_expression": 111,
197
+ "import_from_statement": 112,
198
+ "unless_statement": 113,
199
+ "var_declaration": 114,
200
+ "else_statement": 115,
201
+ "__else_statement": 116,
202
+ "else_statement__": 117,
203
+ "method_declaration__": 118,
204
+ "__method_declaration": 119,
205
+ "new_expression__": 120,
206
+ "__new_expression": 121,
207
+ "unary_op_expression": 122,
208
+ "short_var_declaration": 123,
209
+ "__case_statement": 124,
210
+ "case_statement__": 125,
211
+ "__anonymous_function_creation_expression": 126,
212
+ "anonymous_function_creation_expression__": 127,
213
+ "augmented_assignment_expression": 128,
214
+ "__defer_statement": 129,
215
+ "defer_statement__": 130,
216
+ "delete_statement": 131,
217
+ "throw_statement": 132,
218
+ "parameter_declaration": 133,
219
+ "import_statement": 134,
220
+ "when_statement": 135,
221
+ "when_statement__": 136,
222
+ "__when_statement": 137,
223
+ "__lambda_expression": 138,
224
+ "lambda_expression__": 139,
225
+ "ternary_expression": 140,
226
+ "switch_expression__": 141,
227
+ "__switch_expression": 142,
228
+ "__synchronized_statement": 143,
229
+ "synchronized_statement__": 144,
230
+ "print_statement": 145,
231
+ "lambda_expression": 146,
232
+ "switch_statement__": 147,
233
+ "__switch_statement": 148,
234
+ "operator_assignment_statement": 149,
235
+ "type_assertion_expression__": 150,
236
+ "__type_assertion_expression": 151,
237
+ "rescue_statement": 152,
238
+ "__assert_statement": 153,
239
+ "assert_statement__": 154,
240
+ "for_in_statement__": 155,
241
+ "__for_in_statement": 156,
242
+ "__rescue_statement": 157,
243
+ "rescue_statement__": 158,
244
+ "__unset_statement": 159,
245
+ "unset_statement__": 160,
246
+ "type_assertion_expression": 161,
247
+ "__assignment": 162,
248
+ "assignment__": 163,
249
+ "__unless_statement": 164,
250
+ "unless_statement__": 165,
251
+ "expression_switch_statement__": 166,
252
+ "__expression_switch_statement": 167,
253
+ "inc_statement": 168,
254
+ "slice_expression": 169,
255
+ "__slice_expression": 170,
256
+ "slice_expression__": 171,
257
+ "__operator_assignment_statement": 172,
258
+ "operator_assignment_statement__": 173,
259
+ "__try_with_resources_statement": 174,
260
+ "try_with_resources_statement__": 175,
261
+ "global_statement": 176,
262
+ "receive_statement__": 177,
263
+ "__receive_statement": 178,
264
+ "field_declaration": 179,
265
+ "await_expression__": 180,
266
+ "__await_expression": 181,
267
+ "type_switch_statement__": 182,
268
+ "__type_switch_statement": 183,
269
+ "do_statement__": 184,
270
+ "__do_statement": 185,
271
+ "__go_statement": 186,
272
+ "go_statement__": 187,
273
+ "case_statement": 188,
274
+ "type_conversion_expression": 189,
275
+ "sequence_expression__": 190,
276
+ "__sequence_expression": 191,
277
+ "__update_expression": 192,
278
+ "update_expression__": 193,
279
+ "default_statement__": 194,
280
+ "__default_statement": 195,
281
+ "select_statement__": 196,
282
+ "__select_statement": 197,
283
+ "conditional_expression": 198,
284
+ "__send_statement": 199,
285
+ "send_statement__": 200,
286
+ "__echo_statement": 201,
287
+ "echo_statement__": 202,
288
+ "empty_statement": 203,
289
+ "throw_expression": 204,
290
+ "var_declaration__": 205,
291
+ "__var_declaration": 206,
292
+ "compound_statement": 207,
293
+ "__inc_statement": 208,
294
+ "inc_statement__": 209,
295
+ "__labeled_statement": 210,
296
+ "labeled_statement__": 211,
297
+ "jsx_expression": 212,
298
+ "echo_statement": 213,
299
+ "send_statement": 214,
300
+ "__type_conversion_expression": 215,
301
+ "type_conversion_expression__": 216,
302
+ "clone_expression": 217,
303
+ "unset_statement": 218,
304
+ "method_declaration": 219,
305
+ "yield_expression__": 220,
306
+ "__yield_expression": 221,
307
+ "dec_statement": 222,
308
+ "__jsx_expression": 223,
309
+ "jsx_expression__": 224,
310
+ "clone_expression__": 225,
311
+ "__clone_expression": 226,
312
+ "const_declaration__": 227,
313
+ "__const_declaration": 228,
314
+ "until_statement": 229,
315
+ "__require_once_expression": 230,
316
+ "require_once_expression__": 231,
317
+ "require_once_expression": 232,
318
+ "const_declaration": 233,
319
+ "field_declaration__": 234,
320
+ "__field_declaration": 235,
321
+ "include_expression__": 236,
322
+ "__include_expression": 237,
323
+ "__dec_statement": 238,
324
+ "dec_statement__": 239,
325
+ "__until_statement": 240,
326
+ "until_statement__": 241,
327
+ "goto_statement": 242,
328
+ "for_statement": 243,
329
+ "type_declaration": 244,
330
+ "__require_expression": 245,
331
+ "require_expression__": 246,
332
+ "nonlocal_statement": 247,
333
+ "yield_expression": 248,
334
+ "fallthrough_statement": 249,
335
+ "include_expression": 250,
336
+ "require_expression": 251,
337
+ "class_declaration__": 252,
338
+ "__class_declaration": 253,
339
+ "instanceof_expression__": 254,
340
+ "__instanceof_expression": 255,
341
+ "type_declaration__": 256,
342
+ "__type_declaration": 257,
343
+ "rest_assignment_statement": 258,
344
+ "variadic_parameter_declaration": 259,
345
+ "include_once_expression__": 260,
346
+ "__include_once_expression": 261,
347
+ "sequence_expression": 262,
348
+ "include_once_expression": 263,
349
+ "default_statement": 264,
350
+ "named_label_statement": 265,
351
+ "__exponentiation_expression": 266,
352
+ "exponentiation_expression__": 267,
353
+ "exponentiation_expression": 268,
354
+ "__constructor_declaration": 269,
355
+ "await_expression": 270,
356
+ "constructor_declaration__": 271,
357
+ "destructured_left_assignment_statement": 272,
358
+ "shell_command_expression": 273,
359
+ "enhanced_for_statement": 274,
360
+ "__class_constant_access_expression": 275,
361
+ "debugger_statement": 276,
362
+ "__parameter_declaration": 277,
363
+ "parameter_declaration__": 278,
364
+ "class_constant_access_expression__": 279,
365
+ "declare_statement": 280,
366
+ "constructor_declaration": 281,
367
+ "for_in_statement": 282,
368
+ "__continue_statement": 283,
369
+ "try_with_resources_statement": 284,
370
+ "continue_statement__": 285,
371
+ "select_statement": 286,
372
+ "try_statement": 287,
373
+ "labeled_statement": 288,
374
+ "expression_switch_statement": 289,
375
+ "augmented_assignment__": 290,
376
+ "scoped_property_access_expression__": 291,
377
+ "yield_statement__": 292,
378
+ "__yield_statement": 293,
379
+ "__scoped_property_access_expression": 294,
380
+ "__augmented_assignment": 295,
381
+ "with_statement": 296
382
+ },
383
+ "unk_token": "[UNK]"
384
+ }
385
+ }
ast/tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[SOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MSK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[SEP]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "[SOS]",
53
+ "clean_up_tokenization_spaces": true,
54
+ "eos_token": "[EOS]",
55
+ "mask_token": "[MSK]",
56
+ "model_max_length": 512,
57
+ "pad_token": "[PAD]",
58
+ "sep_token": "[SEP]",
59
+ "tokenizer_class": "PreTrainedTokenizerFast",
60
+ "unk_token": "[UNK]"
61
+ }
code_tokenizer_fast/special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[SOS]",
3
+ "eos_token": "[EOS]",
4
+ "mask_token": "[MSK]",
5
+ "pad_token": "[PAD]",
6
+ "sep_token": "[SEP]",
7
+ "unk_token": "[UNK]"
8
+ }
code_tokenizer_fast/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
code_tokenizer_fast/tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[SOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MSK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[SEP]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "[SOS]",
53
+ "clean_up_tokenization_spaces": true,
54
+ "eos_token": "[EOS]",
55
+ "mask_token": "[MSK]",
56
+ "model_max_length": 512,
57
+ "pad_token": "[PAD]",
58
+ "sep_token": "[SEP]",
59
+ "tokenizer_class": "PreTrainedTokenizerFast",
60
+ "unk_token": "[UNK]"
61
+ }
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/sptcode/pre-trained/models/all",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "BartModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 768,
12
+ "decoder_attention_heads": 12,
13
+ "decoder_ffn_dim": 3072,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 12,
16
+ "decoder_start_token_id": 1,
17
+ "dropout": 0.1,
18
+ "encoder_attention_heads": 12,
19
+ "encoder_ffn_dim": 3072,
20
+ "encoder_layerdrop": 0.0,
21
+ "encoder_layers": 12,
22
+ "eos_token_id": 2,
23
+ "forced_eos_token_id": 2,
24
+ "gradient_checkpointing": false,
25
+ "id2label": {
26
+ "0": "LABEL_0",
27
+ "1": "LABEL_1"
28
+ },
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "label2id": {
32
+ "LABEL_0": 0,
33
+ "LABEL_1": 1
34
+ },
35
+ "max_length": 100,
36
+ "max_position_embeddings": 512,
37
+ "min_length": 1,
38
+ "model_type": "bart",
39
+ "num_beams": 5,
40
+ "num_hidden_layers": 12,
41
+ "pad_token_id": 0,
42
+ "scale_embedding": false,
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.40.1",
45
+ "use_cache": true,
46
+ "vocab_size": 80297
47
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2331f1e67bf243f64b17d9bd1616d9506ae9c1e892c0e5709446ec59d741b86d
3
+ size 1043801784
nl_tokenizer_fast/special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[SOS]",
3
+ "eos_token": "[EOS]",
4
+ "mask_token": "[MSK]",
5
+ "pad_token": "[PAD]",
6
+ "sep_token": "[SEP]",
7
+ "unk_token": "[UNK]"
8
+ }
nl_tokenizer_fast/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tokenizer_fast/tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[SOS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[EOS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MSK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[SEP]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "[SOS]",
53
+ "clean_up_tokenization_spaces": true,
54
+ "eos_token": "[EOS]",
55
+ "mask_token": "[MSK]",
56
+ "model_max_length": 512,
57
+ "pad_token": "[PAD]",
58
+ "sep_token": "[SEP]",
59
+ "tokenizer_class": "PreTrainedTokenizerFast",
60
+ "unk_token": "[UNK]"
61
+ }