firefunction-v1 / tokenizer_config.json
pgarbacki's picture
Upload tokenizer
10cda3d verified
raw history blame
No virus
4.84 kB
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<functioncall>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32001": {
"content": "<plain>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"chat_template": "{%- set _mode = mode | default('generate', true) -%}\n{%- set message_roles = ['SYSTEM', 'USER', 'ASSISTANT', 'TOOL'] -%}\n{%- set ns = namespace(seen_non_system=false, messages=messages, content='', functions=functions | default('[]')) -%}\n{%- if _mode == 'generate' -%}\n{{ bos_token }}\n{%- endif -%}\n{#- Basic consistency checks -#}\n{%- if not ns.messages -%}\n {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper == 'FUNCTIONS' -%}\n {%- set ns.functions = ns.messages[0]['content'] -%}\n {%- set ns.messages = ns.messages[1:] -%}\n{%- endif -%}\n{%- if not ns.messages -%}\n {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper != 'SYSTEM' -%}\n {%- set ns.messages = [{'role': 'SYSTEM', 'content': 'You are a helpful assistant with access to functions. Use them if required.'}] + ns.messages -%}\n{%- endif -%}\n{%- for message in ns.messages -%}\n {%- set role = message['role'] | upper -%}\n {%- set ns.content = message['content'] if message.get('content') else '' -%}\n {%- if _mode == 'generate' -%}\n {#- Move tool calls inside the content -#}\n {%- if 'tool_calls' in message -%}\n {%- for call in message['tool_calls'] -%}\n {%- if not loop.first -%}\n {%- set ns.content = ns.content + ' ' -%}\n {%- endif -%}\n {%- set ns.content = ns.content + '<functioncall>{\"name\": \"' + call['function']['name'] + '\", \"arguments\": ' + call['function']['arguments'] + '}' -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {#- Validation -#}\n {%- if role not in message_roles -%}\n {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles + ' are supported.') }}\n {%- endif -%}\n {%- if role == 'SYSTEM' and ns.seen_non_system -%}\n {{ raise_exception('SYSTEM messages have to be at the front') }}\n {%- endif -%}\n {#- First message is guaranteed to be a SYSTEM message per earlier checks -#}\n {%- if loop.first -%}\n SYSTEM: {{ ns.content }}\n {%- continue -%}\n {%- endif -%}\n {%- if role == ns.messages[loop.index0 - 1]['role'] | upper -%}\n {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {%- if role == 'ASSISTANT' and '<functioncall>' not in ns.content -%}\n {#- TODO: make it work for duplicate messages where function call is in a subsequent message -#}\n {%- set ns.content = '<plain>' + ns.content -%}\n {%- endif -%}\n {#- First message after the SYSTEM section -#}\n {%- if not ns.seen_non_system and role != 'SYSTEM' -%}\n {%- set ns.seen_non_system = true -%}\n {{ '\\n\\n' }}FUNCTIONS: {{ ns.functions }}{{ '\\n\\n' }}\n {#- Prompt masking separator -#}\n {%- if _mode == 'train' -%}\n {{ unk_token }}\n {%- endif -%}\n {{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n {%- continue -%}\n {%- endif -%}\n {{ '\\n\\n' }}{{ role }}: {{ ns.content }}\n {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n {{ eos_token }}\n {%- endif -%}\n{%- endfor -%}\n{%- if _mode == 'generate' -%}\n {{ '\\n\\n' }}ASSISTANT:{{ ' ' }}\n{%- endif -%}\n",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<unk>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}