beothorn commited on
Commit
a88e793
1 Parent(s): 640c9ea

Upload tokenizer

Browse files
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
 
4
  "unk_token": "<|endoftext|>"
5
  }
 
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
  "unk_token": "<|endoftext|>"
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,9 +1,13 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
4
  "eos_token": "<|endoftext|>",
5
- "model_max_length": 1024,
6
- "name_or_path": "gpt2",
 
 
 
7
  "special_tokens_map_file": null,
8
  "tokenizer_class": "GPT2Tokenizer",
9
  "unk_token": "<|endoftext|>"
 
1
  {
2
+ "add_bos_token": false,
3
  "add_prefix_space": false,
4
  "bos_token": "<|endoftext|>",
5
  "eos_token": "<|endoftext|>",
6
+ "errors": "replace",
7
+ "full_tokenizer_file": null,
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "name_or_path": "pierreguillou/gpt2-small-portuguese",
10
+ "pad_token": "<|endoftext|>",
11
  "special_tokens_map_file": null,
12
  "tokenizer_class": "GPT2Tokenizer",
13
  "unk_token": "<|endoftext|>"
vocab.json CHANGED
The diff for this file is too large to render. See raw diff