Upload tokenizer
Browse files- tokenizer.json +10 -18
- tokenizer_config.json +0 -1
- vocab.json +0 -0
tokenizer.json
CHANGED
@@ -879,25 +879,17 @@
|
|
879 |
],
|
880 |
"normalizer": null,
|
881 |
"pre_tokenizer": {
|
882 |
-
"type": "
|
883 |
-
"
|
884 |
-
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
|
891 |
-
|
892 |
-
{
|
893 |
-
"type": "ByteLevel",
|
894 |
-
"add_prefix_space": false,
|
895 |
-
"trim_offsets": true,
|
896 |
-
"use_regex": false
|
897 |
-
}
|
898 |
-
]
|
899 |
},
|
900 |
-
"post_processor": null,
|
901 |
"decoder": {
|
902 |
"type": "ByteLevel",
|
903 |
"add_prefix_space": true,
|
|
|
879 |
],
|
880 |
"normalizer": null,
|
881 |
"pre_tokenizer": {
|
882 |
+
"type": "ByteLevel",
|
883 |
+
"add_prefix_space": false,
|
884 |
+
"trim_offsets": true,
|
885 |
+
"use_regex": true
|
886 |
+
},
|
887 |
+
"post_processor": {
|
888 |
+
"type": "ByteLevel",
|
889 |
+
"add_prefix_space": true,
|
890 |
+
"trim_offsets": false,
|
891 |
+
"use_regex": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
892 |
},
|
|
|
893 |
"decoder": {
|
894 |
"type": "ByteLevel",
|
895 |
"add_prefix_space": true,
|
tokenizer_config.json
CHANGED
@@ -785,7 +785,6 @@
|
|
785 |
"eos_token": "<|im_end|>",
|
786 |
"errors": "replace",
|
787 |
"extra_special_tokens": {},
|
788 |
-
"from_slow": true,
|
789 |
"legacy": false,
|
790 |
"model_max_length": 16384,
|
791 |
"pad_token": "<|dummy_87|>",
|
|
|
785 |
"eos_token": "<|im_end|>",
|
786 |
"errors": "replace",
|
787 |
"extra_special_tokens": {},
|
|
|
788 |
"legacy": false,
|
789 |
"model_max_length": 16384,
|
790 |
"pad_token": "<|dummy_87|>",
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|