HighCloud / tokenizer_config.json
DINGDINGBELLS's picture
Update tokenizer_config.json
7c2a450 verified
{
"add_bos_token": true,
"add_eos_token": false,
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"model_max_length": 2048,
"pad_token": "[PAD]",
"tokenizer_class": "GPT2Tokenizer",
"additional_special_tokens": [
"<|vision_16|>",
"<|logic_32|>",
"<|lang_ru|>",
"<|lang_en|>",
"<|lang_ua|>",
"<|lang_by|>"
],
"chat_template": "{% for message in messages %}{{ '<|lang_ru|>' + message['role'] + '\\n' + message['content'] + '<|endoftext|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|lang_ru|>assistant\\n' }}{% endif %}"
}