cristianmiranda commited on
Commit
b3afbd9
1 Parent(s): ce5b147

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,23 +1,4 @@
1
  {
2
- "bos_token": {
3
- "content": "<|begin_of_text|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end_of_text|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|reserved_special_token_250|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
1
  {
2
+ "bos_token": "<|begin_of_text|>",
3
+ "eos_token": "<|end_of_text|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
tokenizer.json CHANGED
@@ -2407,7 +2407,6 @@
2407
  "end_of_word_suffix": null,
2408
  "fuse_unk": false,
2409
  "byte_fallback": false,
2410
- "ignore_merges": true,
2411
  "vocab": {
2412
  "!": 0,
2413
  "\"": 1,
 
2407
  "end_of_word_suffix": null,
2408
  "fuse_unk": false,
2409
  "byte_fallback": false,
 
2410
  "vocab": {
2411
  "!": 0,
2412
  "\"": 1,
tokenizer_config.json CHANGED
@@ -2056,8 +2056,6 @@
2056
  "input_ids",
2057
  "attention_mask"
2058
  ],
2059
- "model_max_length": 8192,
2060
- "pad_token": "<|reserved_special_token_250|>",
2061
- "padding_side": "left",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
 
2056
  "input_ids",
2057
  "attention_mask"
2058
  ],
2059
+ "model_max_length": 1000000000000000019884624838656,
 
 
2060
  "tokenizer_class": "PreTrainedTokenizerFast"
2061
  }