Fix Incorrect Prompt Template defined in tokenizer_config.json
Browse filesThe `chat_template` property in `tokenizer_config.json` currently contains the Llama-3 chat template, rather than ChatML which is what this model is actually trained with. This template is picked up by various tools and inference applications so it's beneficial that it reflects the real template the model uses.
- tokenizer_config.json +1 -1
tokenizer_config.json
CHANGED
@@ -2066,7 +2066,7 @@
|
|
2066 |
}
|
2067 |
},
|
2068 |
"bos_token": "<|begin_of_text|>",
|
2069 |
-
"chat_template": "{% set
|
2070 |
"clean_up_tokenization_spaces": true,
|
2071 |
"eos_token": "<|im_end|>",
|
2072 |
"model_input_names": [
|
|
|
2066 |
}
|
2067 |
},
|
2068 |
"bos_token": "<|begin_of_text|>",
|
2069 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
2070 |
"clean_up_tokenization_spaces": true,
|
2071 |
"eos_token": "<|im_end|>",
|
2072 |
"model_input_names": [
|