ClaudiaIoana550 commited on
Commit
972ab63
1 Parent(s): 3b48d04

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
- "eos_token": "<|endoftext|>"
 
16
  }
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "eos_token": "<|endoftext|>",
16
+ "pad_token": "<|endoftext|>"
17
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
  "add_prefix_space": false,
 
3
  "eos_token": "<|endoftext|>",
4
  "model_input_names": [
5
  "input_ids",
6
  "attention_mask"
7
  ],
8
  "model_max_length": 2048,
9
- "name_or_path": "tiiuae/falcon_tokenizer",
10
- "special_tokens_map_file": null,
11
  "tokenizer_class": "PreTrainedTokenizerFast"
12
- }
 
1
  {
2
  "add_prefix_space": false,
3
+ "clean_up_tokenization_spaces": true,
4
  "eos_token": "<|endoftext|>",
5
  "model_input_names": [
6
  "input_ids",
7
  "attention_mask"
8
  ],
9
  "model_max_length": 2048,
 
 
10
  "tokenizer_class": "PreTrainedTokenizerFast"
11
+ }