ManukyanD commited on
Commit
0aae421
1 Parent(s): f6cc33c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -4
  2. tokenizer_config.json +7 -0
tokenizer.json CHANGED
@@ -989,8 +989,8 @@
989
  "pre_tokenizer": {
990
  "type": "Metaspace",
991
  "replacement": "▁",
992
- "add_prefix_space": true,
993
- "prepend_scheme": "always"
994
  },
995
  "post_processor": {
996
  "type": "TemplateProcessing",
@@ -1049,8 +1049,8 @@
1049
  "decoder": {
1050
  "type": "Metaspace",
1051
  "replacement": "▁",
1052
- "add_prefix_space": true,
1053
- "prepend_scheme": "always"
1054
  },
1055
  "model": {
1056
  "type": "Unigram",
 
989
  "pre_tokenizer": {
990
  "type": "Metaspace",
991
  "replacement": "▁",
992
+ "prepend_scheme": "always",
993
+ "split": true
994
  },
995
  "post_processor": {
996
  "type": "TemplateProcessing",
 
1049
  "decoder": {
1050
  "type": "Metaspace",
1051
  "replacement": "▁",
1052
+ "prepend_scheme": "always",
1053
+ "split": true
1054
  },
1055
  "model": {
1056
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -954,9 +954,16 @@
954
  "clean_up_tokenization_spaces": true,
955
  "eos_token": "</s>",
956
  "extra_ids": 100,
 
957
  "model_max_length": 512,
 
958
  "pad_token": "<pad>",
 
 
959
  "sp_model_kwargs": {},
 
960
  "tokenizer_class": "T5Tokenizer",
 
 
961
  "unk_token": "<unk>"
962
  }
 
954
  "clean_up_tokenization_spaces": true,
955
  "eos_token": "</s>",
956
  "extra_ids": 100,
957
+ "max_length": 512,
958
  "model_max_length": 512,
959
+ "pad_to_multiple_of": null,
960
  "pad_token": "<pad>",
961
+ "pad_token_type_id": 0,
962
+ "padding_side": "right",
963
  "sp_model_kwargs": {},
964
+ "stride": 0,
965
  "tokenizer_class": "T5Tokenizer",
966
+ "truncation_side": "right",
967
+ "truncation_strategy": "longest_first",
968
  "unk_token": "<unk>"
969
  }