Add "chat_template" to tokenizer_config.json
Browse filesSince version 4.34.0, Transformers has added a feature called chat_template to its tokenizer. Like other LLMs, PLaMo also performs better in dialogues when inputs are provided according to the expected template. This Pull Request (PR) adds the necessary fields to tokenizer_config.json to use Transformers' apply_chat_template() function to transform inputs into the format expected by PLaMo.
- tokenizer_config.json +1 -0
tokenizer_config.json
CHANGED
@@ -75,6 +75,7 @@
|
|
75 |
]
|
76 |
},
|
77 |
"bos_token": "<s>",
|
|
|
78 |
"clean_up_tokenization_spaces": false,
|
79 |
"cls_token": "<cls>",
|
80 |
"eos_token": "</s>",
|
|
|
75 |
]
|
76 |
},
|
77 |
"bos_token": "<s>",
|
78 |
+
"chat_template": "{{ '以下はタスクを説明する指示で、文脈を説明した入力とペアになっています。要求を適切に補完するよう応答を書いてください。' }}\n\n{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '### 指示:\n' + message['content'].strip() }}\n\n{% endif %}\n{% endfor %}\n{{ '### 応答:' }}",
|
79 |
"clean_up_tokenization_spaces": false,
|
80 |
"cls_token": "<cls>",
|
81 |
"eos_token": "</s>",
|