Upload tokenizer
Browse files- tokenizer.json +4 -25
- tokenizer_config.json +0 -2
tokenizer.json
CHANGED
@@ -275,30 +275,10 @@
|
|
275 |
"use_regex": true
|
276 |
},
|
277 |
"post_processor": {
|
278 |
-
"type": "
|
279 |
-
"
|
280 |
-
|
281 |
-
|
282 |
-
"id": "A",
|
283 |
-
"type_id": 0
|
284 |
-
}
|
285 |
-
}
|
286 |
-
],
|
287 |
-
"pair": [
|
288 |
-
{
|
289 |
-
"Sequence": {
|
290 |
-
"id": "A",
|
291 |
-
"type_id": 0
|
292 |
-
}
|
293 |
-
},
|
294 |
-
{
|
295 |
-
"Sequence": {
|
296 |
-
"id": "B",
|
297 |
-
"type_id": 1
|
298 |
-
}
|
299 |
-
}
|
300 |
-
],
|
301 |
-
"special_tokens": {}
|
302 |
},
|
303 |
"decoder": {
|
304 |
"type": "ByteLevel",
|
@@ -314,7 +294,6 @@
|
|
314 |
"end_of_word_suffix": null,
|
315 |
"fuse_unk": false,
|
316 |
"byte_fallback": false,
|
317 |
-
"ignore_merges": false,
|
318 |
"vocab": {
|
319 |
"<|endoftext|>": 0,
|
320 |
"<|padding|>": 1,
|
|
|
275 |
"use_regex": true
|
276 |
},
|
277 |
"post_processor": {
|
278 |
+
"type": "ByteLevel",
|
279 |
+
"add_prefix_space": false,
|
280 |
+
"trim_offsets": true,
|
281 |
+
"use_regex": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
},
|
283 |
"decoder": {
|
284 |
"type": "ByteLevel",
|
|
|
294 |
"end_of_word_suffix": null,
|
295 |
"fuse_unk": false,
|
296 |
"byte_fallback": false,
|
|
|
297 |
"vocab": {
|
298 |
"<|endoftext|>": 0,
|
299 |
"<|padding|>": 1,
|
tokenizer_config.json
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
{
|
2 |
-
"add_bos_token": false,
|
3 |
-
"add_eos_token": false,
|
4 |
"add_prefix_space": false,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
|
|
1 |
{
|
|
|
|
|
2 |
"add_prefix_space": false,
|
3 |
"added_tokens_decoder": {
|
4 |
"0": {
|