Iheb-Chaabane commited on
Commit
893fefd
1 Parent(s): ca6ceed

feat add pad_token

Browse files
special_tokens_map.json CHANGED
@@ -32,7 +32,7 @@
32
  "single_word": false
33
  },
34
  "pad_token": {
35
- "content": "<|endoftext|>",
36
  "lstrip": false,
37
  "normalized": false,
38
  "rstrip": false,
 
32
  "single_word": false
33
  },
34
  "pad_token": {
35
+ "content": "<|pad|>",
36
  "lstrip": false,
37
  "normalized": false,
38
  "rstrip": false,
tokenizer.json CHANGED
@@ -18212,7 +18212,7 @@
18212
  },
18213
  {
18214
  "id": 2023,
18215
- "content": ">>UNUSED_1897<<",
18216
  "single_word": false,
18217
  "lstrip": false,
18218
  "rstrip": false,
@@ -20280,7 +20280,7 @@
20280
  ">>UNUSED_1894<<": 2020,
20281
  ">>UNUSED_1895<<": 2021,
20282
  ">>UNUSED_1896<<": 2022,
20283
- ">>UNUSED_1897<<": 2023,
20284
  "!": 2024,
20285
  "\"": 2025,
20286
  "#": 2026,
 
18212
  },
18213
  {
18214
  "id": 2023,
18215
+ "content": "<|pad|>",
18216
  "single_word": false,
18217
  "lstrip": false,
18218
  "rstrip": false,
 
20280
  ">>UNUSED_1894<<": 2020,
20281
  ">>UNUSED_1895<<": 2021,
20282
  ">>UNUSED_1896<<": 2022,
20283
+ "<|pad|>": 2023,
20284
  "!": 2024,
20285
  "\"": 2025,
20286
  "#": 2026,
tokenizer_config.json CHANGED
@@ -16186,7 +16186,7 @@
16186
  "special": true
16187
  },
16188
  "2023": {
16189
- "content": ">>UNUSED_1897<<",
16190
  "lstrip": false,
16191
  "normalized": false,
16192
  "rstrip": false,
@@ -16219,7 +16219,6 @@
16219
  ">>PASSWORD<<",
16220
  ">>KEY<<"
16221
  ],
16222
- "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
16223
  "clean_up_tokenization_spaces": true,
16224
  "eos_token": "<|endoftext|>",
16225
  "extra_special_tokens": {},
@@ -16227,7 +16226,7 @@
16227
  "input_ids",
16228
  "attention_mask"
16229
  ],
16230
- "model_max_length": 8192,
16231
- "pad_token": "<|endoftext|>",
16232
  "tokenizer_class": "PreTrainedTokenizerFast"
16233
  }
 
16186
  "special": true
16187
  },
16188
  "2023": {
16189
+ "content": "<|pad|>",
16190
  "lstrip": false,
16191
  "normalized": false,
16192
  "rstrip": false,
 
16219
  ">>PASSWORD<<",
16220
  ">>KEY<<"
16221
  ],
 
16222
  "clean_up_tokenization_spaces": true,
16223
  "eos_token": "<|endoftext|>",
16224
  "extra_special_tokens": {},
 
16226
  "input_ids",
16227
  "attention_mask"
16228
  ],
16229
+ "model_max_length": 32768,
16230
+ "pad_token": "<|pad|>",
16231
  "tokenizer_class": "PreTrainedTokenizerFast"
16232
  }