Upload tokenizer
Browse files- tokenizer.json +4 -4
- tokenizer_config.json +7 -0
tokenizer.json
CHANGED
@@ -989,8 +989,8 @@
|
|
989 |
"pre_tokenizer": {
|
990 |
"type": "Metaspace",
|
991 |
"replacement": "▁",
|
992 |
-
"
|
993 |
-
"
|
994 |
},
|
995 |
"post_processor": {
|
996 |
"type": "TemplateProcessing",
|
@@ -1049,8 +1049,8 @@
|
|
1049 |
"decoder": {
|
1050 |
"type": "Metaspace",
|
1051 |
"replacement": "▁",
|
1052 |
-
"
|
1053 |
-
"
|
1054 |
},
|
1055 |
"model": {
|
1056 |
"type": "Unigram",
|
|
|
989 |
"pre_tokenizer": {
|
990 |
"type": "Metaspace",
|
991 |
"replacement": "▁",
|
992 |
+
"prepend_scheme": "always",
|
993 |
+
"split": true
|
994 |
},
|
995 |
"post_processor": {
|
996 |
"type": "TemplateProcessing",
|
|
|
1049 |
"decoder": {
|
1050 |
"type": "Metaspace",
|
1051 |
"replacement": "▁",
|
1052 |
+
"prepend_scheme": "always",
|
1053 |
+
"split": true
|
1054 |
},
|
1055 |
"model": {
|
1056 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
@@ -954,9 +954,16 @@
|
|
954 |
"clean_up_tokenization_spaces": true,
|
955 |
"eos_token": "</s>",
|
956 |
"extra_ids": 100,
|
|
|
957 |
"model_max_length": 512,
|
|
|
958 |
"pad_token": "<pad>",
|
|
|
|
|
959 |
"sp_model_kwargs": {},
|
|
|
960 |
"tokenizer_class": "T5Tokenizer",
|
|
|
|
|
961 |
"unk_token": "<unk>"
|
962 |
}
|
|
|
954 |
"clean_up_tokenization_spaces": true,
|
955 |
"eos_token": "</s>",
|
956 |
"extra_ids": 100,
|
957 |
+
"max_length": 512,
|
958 |
"model_max_length": 512,
|
959 |
+
"pad_to_multiple_of": null,
|
960 |
"pad_token": "<pad>",
|
961 |
+
"pad_token_type_id": 0,
|
962 |
+
"padding_side": "right",
|
963 |
"sp_model_kwargs": {},
|
964 |
+
"stride": 0,
|
965 |
"tokenizer_class": "T5Tokenizer",
|
966 |
+
"truncation_side": "right",
|
967 |
+
"truncation_strategy": "longest_first",
|
968 |
"unk_token": "<unk>"
|
969 |
}
|