pgarbacki commited on
Commit
10cda3d
1 Parent(s): 1569f53

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +1 -1
tokenizer_config.json CHANGED
@@ -45,7 +45,7 @@
45
  },
46
  "additional_special_tokens": [],
47
  "bos_token": "<s>",
48
- "chat_template": "{%- set _mode = mode | default('generate', true) -%}\n{%- set message_roles = ['SYSTEM', 'USER', 'ASSISTANT', 'TOOL'] -%}\n{%- set ns = namespace(seen_non_system=false, messages=messages, content='') -%}\n{%- if _mode == 'generate' -%}\n{{ bos_token }}\n{%- endif -%}\n{#- Basic consistency checks -#}\n{%- if not messages -%}\n {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if messages[0]['role'] | upper != 'SYSTEM' -%}\n {%- set ns.messages = [{'role': 'SYSTEM', 'content': 'You are a helpful assistant with access to functions. Use them if required.'}] + messages -%}\n{%- endif -%}\n{%- for message in ns.messages -%}\n {%- set role = message['role'] | upper -%}\n {%- set ns.content = message['content'] if message.get('content') else '' -%}\n {%- if _mode == 'generate' -%}\n {#- Move tool calls inside the content -#}\n {%- if 'tool_calls' in message -%}\n {%- for call in message['tool_calls'] -%}\n {%- if not loop.first -%}\n {%- set ns.content = ns.content + ' ' -%}\n {%- endif -%}\n {%- set ns.content = ns.content + '<functioncall>{\"name\": \"' + call['function']['name'] + '\", \"arguments\": ' + call['function']['arguments'] + '}' -%}\n {# \\n\\nDEBUG: content: {{ ns.content }}\\n\\n #}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {#- Validation -#}\n {%- if role not in message_roles -%}\n {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles + ' are supported.') }}\n {%- endif -%}\n {%- if role == 'SYSTEM' and ns.seen_non_system -%}\n {{ raise_exception('SYSTEM messages have to be at the front') }}\n {%- endif -%}\n {#- First message is guaranteed to be a SYSTEM message per earlier checks -#}\n {%- if loop.first -%}\n SYSTEM: {{ ns.content }}\n {%- continue -%}\n {%- endif -%}\n {%- if role == ns.messages[loop.index0 - 1]['role'] | upper -%}\n {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {%- if role == 'ASSISTANT' and '<functioncall>' not in ns.content -%}\n {#- TODO: make it work for duplicate messages where function call is in a subsequent message -#}\n {%- set ns.content = '<plain>' + ns.content -%}\n {%- endif -%}\n {#- First message after the SYSTEM section -#}\n {%- if not ns.seen_non_system and role != 'SYSTEM' -%}\n {%- set ns.seen_non_system = true -%}\n {{ '\\n\\n' }}FUNCTIONS: {{ functions }}{{ '\\n\\n' }}\n {#- Prompt masking separator -#}\n {%- if _mode == 'train' -%}\n {{ unk_token }}\n {%- endif -%}\n {{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {{ '\\n\\n' }}{{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n{%- endfor -%}\n{%- if _mode == 'generate' -%}\n {{ '\\n\\n' }}ASSISTANT:{{ ' ' }}\n{%- endif -%}\n",
49
  "clean_up_tokenization_spaces": false,
50
  "eos_token": "</s>",
51
  "legacy": false,
 
45
  },
46
  "additional_special_tokens": [],
47
  "bos_token": "<s>",
48
+ "chat_template": "{%- set _mode = mode | default('generate', true) -%}\n{%- set message_roles = ['SYSTEM', 'USER', 'ASSISTANT', 'TOOL'] -%}\n{%- set ns = namespace(seen_non_system=false, messages=messages, content='', functions=functions | default('[]')) -%}\n{%- if _mode == 'generate' -%}\n{{ bos_token }}\n{%- endif -%}\n{#- Basic consistency checks -#}\n{%- if not ns.messages -%}\n {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper == 'FUNCTIONS' -%}\n {%- set ns.functions = ns.messages[0]['content'] -%}\n {%- set ns.messages = ns.messages[1:] -%}\n{%- endif -%}\n{%- if not ns.messages -%}\n {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper != 'SYSTEM' -%}\n {%- set ns.messages = [{'role': 'SYSTEM', 'content': 'You are a helpful assistant with access to functions. Use them if required.'}] + ns.messages -%}\n{%- endif -%}\n{%- for message in ns.messages -%}\n {%- set role = message['role'] | upper -%}\n {%- set ns.content = message['content'] if message.get('content') else '' -%}\n {%- if _mode == 'generate' -%}\n {#- Move tool calls inside the content -#}\n {%- if 'tool_calls' in message -%}\n {%- for call in message['tool_calls'] -%}\n {%- if not loop.first -%}\n {%- set ns.content = ns.content + ' ' -%}\n {%- endif -%}\n {%- set ns.content = ns.content + '<functioncall>{\"name\": \"' + call['function']['name'] + '\", \"arguments\": ' + call['function']['arguments'] + '}' -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {#- Validation -#}\n {%- if role not in message_roles -%}\n {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles + ' are supported.') }}\n {%- endif -%}\n {%- if role == 'SYSTEM' and ns.seen_non_system -%}\n {{ raise_exception('SYSTEM messages have to be at the front') }}\n {%- endif -%}\n {#- First message is guaranteed to be a SYSTEM message per earlier checks -#}\n {%- if loop.first -%}\n SYSTEM: {{ ns.content }}\n {%- continue -%}\n {%- endif -%}\n {%- if role == ns.messages[loop.index0 - 1]['role'] | upper -%}\n {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {%- if role == 'ASSISTANT' and '<functioncall>' not in ns.content -%}\n {#- TODO: make it work for duplicate messages where function call is in a subsequent message -#}\n {%- set ns.content = '<plain>' + ns.content -%}\n {%- endif -%}\n {#- First message after the SYSTEM section -#}\n {%- if not ns.seen_non_system and role != 'SYSTEM' -%}\n {%- set ns.seen_non_system = true -%}\n {{ '\\n\\n' }}FUNCTIONS: {{ ns.functions }}{{ '\\n\\n' }}\n {#- Prompt masking separator -#}\n {%- if _mode == 'train' -%}\n {{ unk_token }}\n {%- endif -%}\n {{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {{ '\\n\\n' }}{{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n{%- endfor -%}\n{%- if _mode == 'generate' -%}\n {{ '\\n\\n' }}ASSISTANT:{{ ' ' }}\n{%- endif -%}\n",
49
  "clean_up_tokenization_spaces": false,
50
  "eos_token": "</s>",
51
  "legacy": false,