new5558 commited on
Commit
3b43620
1 Parent(s): 32a058e

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1 +1,7 @@
1
- {}
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>"
7
+ }
tokenizer.json CHANGED
@@ -128,6 +128,7 @@
128
  "continuing_subword_prefix": null,
129
  "end_of_word_suffix": "</w>",
130
  "fuse_unk": false,
 
131
  "vocab": {
132
  "<s>": 0,
133
  "<pad>": 1,
 
128
  "continuing_subword_prefix": null,
129
  "end_of_word_suffix": "</w>",
130
  "fuse_unk": false,
131
+ "byte_fallback": false,
132
  "vocab": {
133
  "<s>": 0,
134
  "<pad>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,10 @@
1
  {
2
- "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
3
  "tokenizer_class": "PreTrainedTokenizerFast"
4
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "model_max_length": 512,
7
+ "pad_token": "<pad>",
8
+ "special_tokens_map_file": "data/converted_model_mlm_huggingface/special_tokens_map.json",
9
  "tokenizer_class": "PreTrainedTokenizerFast"
10
  }