Add default chat template to tokenizer_config.json

[Automated] This PR adds the default chat template to the tokenizer config, allowing the model to be used with the new conversational widget (see [PR](https://github.com/huggingface/huggingface.js/pull/457)).

If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.

Files changed (1) hide show

tokenizer_config.json +37 -1

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,37 @@
1	- {"errors": "replace", "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "pad_token": "<\|endoftext\|>", "cls_token": "<\|cls\|>", "sep_token": "<\|sep\|>", "special_tokens_map_file": null, "full_tokenizer_file": null, "tokenizer_file": "/home/af1tang/convogym/checkpoint/model/tokenizer.json", "name_or_path": "/home/af1tang/convogym/checkpoint/model/", "tokenizer_class": "GPT2Tokenizer"}

+{
+  "errors": "replace",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": true,
+    "__type": "AddedToken"
+  },
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": true,
+    "__type": "AddedToken"
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "single_word": false,
+    "lstrip": false,
+    "rstrip": false,
+    "normalized": true,
+    "__type": "AddedToken"
+  },
+  "add_prefix_space": false,
+  "pad_token": "<|endoftext|>",
+  "cls_token": "<|cls|>",
+  "sep_token": "<|sep|>",
+  "special_tokens_map_file": null,
+  "full_tokenizer_file": null,
+  "tokenizer_file": "/home/af1tang/convogym/checkpoint/model/tokenizer.json",
+  "name_or_path": "/home/af1tang/convogym/checkpoint/model/",
+  "tokenizer_class": "GPT2Tokenizer",
+  "chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
+}