filipemiguelmartins commited on
Commit
1f24bf1
1 Parent(s): e2ea3a9

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +2 -23
  3. tokenizer_config.json +1 -10
special_tokens_map.json CHANGED
@@ -13,5 +13,5 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "[PAD]"
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|eot_id|>"
17
  }
tokenizer.json CHANGED
@@ -1,19 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": "BatchLongest",
11
- "direction": "Right",
12
- "pad_to_multiple_of": 8,
13
- "pad_id": 128256,
14
- "pad_type_id": 0,
15
- "pad_token": "[PAD]"
16
- },
17
  "added_tokens": [
18
  {
19
  "id": 128000,
@@ -2318,15 +2306,6 @@
2318
  "rstrip": false,
2319
  "normalized": false,
2320
  "special": true
2321
- },
2322
- {
2323
- "id": 128256,
2324
- "content": "[PAD]",
2325
- "single_word": false,
2326
- "lstrip": false,
2327
- "rstrip": false,
2328
- "normalized": false,
2329
- "special": true
2330
  }
2331
  ],
2332
  "normalizer": null,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 128000,
 
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
 
 
 
 
 
 
 
 
 
2309
  }
2310
  ],
2311
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_eos_token": true,
3
  "added_tokens_decoder": {
4
  "128000": {
5
  "content": "<|begin_of_text|>",
@@ -2048,14 +2047,6 @@
2048
  "rstrip": false,
2049
  "single_word": false,
2050
  "special": true
2051
- },
2052
- "128256": {
2053
- "content": "[PAD]",
2054
- "lstrip": false,
2055
- "normalized": false,
2056
- "rstrip": false,
2057
- "single_word": false,
2058
- "special": true
2059
  }
2060
  },
2061
  "bos_token": "<|begin_of_text|>",
@@ -2067,6 +2058,6 @@
2067
  "attention_mask"
2068
  ],
2069
  "model_max_length": 1000000000000000019884624838656,
2070
- "pad_token": "[PAD]",
2071
  "tokenizer_class": "PreTrainedTokenizerFast"
2072
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "128000": {
4
  "content": "<|begin_of_text|>",
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|eot_id|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }