Aye10032 commited on
Commit
2e570c0
1 Parent(s): f9d5a1d

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,3 +1,8 @@
1
  {
2
- "eos_token": "|<EOS>|"
 
 
 
 
 
3
  }
 
1
  {
2
+ "bos_token": "|<BOS>|",
3
+ "cls_token": "|<CLS>|",
4
+ "eos_token": "|<EOS>|",
5
+ "mask_token": "|<MASK>|",
6
+ "pad_token": "|<PAD>|",
7
+ "sep_token": "|<SEP>|"
8
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -57,8 +57,17 @@
57
  "special": true
58
  }
59
  },
 
60
  "clean_up_tokenization_spaces": true,
 
61
  "eos_token": "|<EOS>|",
62
- "model_max_length": 8000,
 
 
 
 
 
 
 
63
  "tokenizer_class": "PreTrainedTokenizerFast"
64
  }
 
57
  "special": true
58
  }
59
  },
60
+ "bos_token": "|<BOS>|",
61
  "clean_up_tokenization_spaces": true,
62
+ "cls_token": "|<CLS>|",
63
  "eos_token": "|<EOS>|",
64
+ "mask_token": "|<MASK>|",
65
+ "max_length": null,
66
+ "model_max_length": 256,
67
+ "pad_to_multiple_of": null,
68
+ "pad_token": "|<PAD>|",
69
+ "pad_token_type_id": 0,
70
+ "padding_side": "right",
71
+ "sep_token": "|<SEP>|",
72
  "tokenizer_class": "PreTrainedTokenizerFast"
73
  }