mrm8488 commited on
Commit
0984ffc
1 Parent(s): 3c0c9df

Update tokenizer_config.json

Browse files

It seems `eos_token_id` is `<|end|>` (32007) instead of `<|endoftext|>` (32000).
Context: https://twitter.com/altryne/status/1783567596467491109?t=k5HHVmTCGDt4-TkXF8KyNw&s=19

Files changed (1) hide show
  1. tokenizer_config.json +1 -1
tokenizer_config.json CHANGED
@@ -118,7 +118,7 @@
118
  "bos_token": "<s>",
119
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
- "eos_token": "<|endoftext|>",
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|endoftext|>",
 
118
  "bos_token": "<s>",
119
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
+ "eos_token": "<|end|>",
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|endoftext|>",