bartowski commited on
Commit
c146b67
1 Parent(s): 2ef8b45

Add ChatML format to tokenizer_config.json chat_template

Browse files

This is used by a lot of inference tools so it's worth defining to make sure people are using the correct format :)

Files changed (1) hide show
  1. tokenizer_config.json +1 -0
tokenizer_config.json CHANGED
@@ -29,6 +29,7 @@
29
  }
30
  },
31
  "bos_token": "<s>",
 
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
  "legacy": true,
 
29
  }
30
  },
31
  "bos_token": "<s>",
32
+ "chat_template": "{%- set ns = namespace(found=false) -%}{%- for message in messages -%}{%- if message['role'] == 'system' -%}{%- set ns.found = true -%}{%- endif -%}{%- endfor -%}{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '<|im_start|>system\n' + message['content'].rstrip() + '<|im_end|>\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'<|im_start|>user\n' + message['content'].rstrip() + '<|im_end|>\n'-}}{%- else -%}{{-'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'<|im_start|>assistant\n'-}}{%- endif -%}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,