Iheb-Chaabane commited on
Commit
f9f20c3
1 Parent(s): 240aa44

feat add pad_token

Browse files
special_tokens_map.json CHANGED
@@ -32,7 +32,7 @@
32
  "single_word": false
33
  },
34
  "pad_token": {
35
- "content": "<|endoftext|>",
36
  "lstrip": false,
37
  "normalized": false,
38
  "rstrip": false,
 
32
  "single_word": false
33
  },
34
  "pad_token": {
35
+ "content": "<|pad|>",
36
  "lstrip": false,
37
  "normalized": false,
38
  "rstrip": false,
tokenizer.json CHANGED
@@ -18212,7 +18212,7 @@
18212
  },
18213
  {
18214
  "id": 2023,
18215
- "content": ">>UNUSED_1897<<",
18216
  "single_word": false,
18217
  "lstrip": false,
18218
  "rstrip": false,
@@ -20280,7 +20280,7 @@
20280
  ">>UNUSED_1894<<": 2020,
20281
  ">>UNUSED_1895<<": 2021,
20282
  ">>UNUSED_1896<<": 2022,
20283
- ">>UNUSED_1897<<": 2023,
20284
  "!": 2024,
20285
  "\"": 2025,
20286
  "#": 2026,
 
18212
  },
18213
  {
18214
  "id": 2023,
18215
+ "content": "<|pad|>",
18216
  "single_word": false,
18217
  "lstrip": false,
18218
  "rstrip": false,
 
20280
  ">>UNUSED_1894<<": 2020,
20281
  ">>UNUSED_1895<<": 2021,
20282
  ">>UNUSED_1896<<": 2022,
20283
+ "<|pad|>": 2023,
20284
  "!": 2024,
20285
  "\"": 2025,
20286
  "#": 2026,
tokenizer_config.json CHANGED
@@ -16186,7 +16186,7 @@
16186
  "special": true
16187
  },
16188
  "2023": {
16189
- "content": ">>UNUSED_1897<<",
16190
  "lstrip": false,
16191
  "normalized": false,
16192
  "rstrip": false,
@@ -16226,7 +16226,7 @@
16226
  "input_ids",
16227
  "attention_mask"
16228
  ],
16229
- "model_max_length": 8192,
16230
- "pad_token": "<|endoftext|>",
16231
  "tokenizer_class": "PreTrainedTokenizerFast"
16232
  }
 
16186
  "special": true
16187
  },
16188
  "2023": {
16189
+ "content": "<|pad|>",
16190
  "lstrip": false,
16191
  "normalized": false,
16192
  "rstrip": false,
 
16226
  "input_ids",
16227
  "attention_mask"
16228
  ],
16229
+ "model_max_length": 32768,
16230
+ "pad_token": "<|pad|>",
16231
  "tokenizer_class": "PreTrainedTokenizerFast"
16232
  }