sgugger ybelkada commited on
Commit
b35374d
1 Parent(s): fbe52b4

Upload tokenizer (#1)

Browse files

- Upload tokenizer (f84d2fe400a7bc474dd84d6967ab694ed1278fba)


Co-authored-by: Younes Belkada <ybelkada@users.noreply.huggingface.co>

Files changed (2) hide show
  1. special_tokens_map.json +5 -1
  2. tokenizer_config.json +5 -1
special_tokens_map.json CHANGED
@@ -1 +1,5 @@
1
- {}
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer_config.json CHANGED
@@ -1,5 +1,9 @@
1
  {
 
 
2
  "clean_up_tokenization_spaces": true,
 
3
  "model_max_length": 1000000000000000019884624838656,
4
- "tokenizer_class": "PreTrainedTokenizerFast"
 
5
  }
 
1
  {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
  "clean_up_tokenization_spaces": true,
5
+ "eos_token": "<|endoftext|>",
6
  "model_max_length": 1000000000000000019884624838656,
7
+ "tokenizer_class": "GPTNeoXTokenizer",
8
+ "unk_token": "<|endoftext|>"
9
  }