File size: 4,841 Bytes
f064372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10cda3d
f064372
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
  "add_bos_token": true,
  "add_eos_token": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "32000": {
      "content": "<functioncall>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "32001": {
      "content": "<plain>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": false
    }
  },
  "additional_special_tokens": [],
  "bos_token": "<s>",
  "chat_template": "{%- set _mode = mode | default('generate', true) -%}\n{%- set message_roles = ['SYSTEM', 'USER', 'ASSISTANT', 'TOOL'] -%}\n{%- set ns = namespace(seen_non_system=false, messages=messages, content='', functions=functions | default('[]')) -%}\n{%- if _mode == 'generate' -%}\n{{ bos_token }}\n{%- endif -%}\n{#- Basic consistency checks -#}\n{%- if not ns.messages -%}\n  {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper == 'FUNCTIONS' -%}\n  {%- set ns.functions = ns.messages[0]['content'] -%}\n  {%- set ns.messages = ns.messages[1:] -%}\n{%- endif -%}\n{%- if not ns.messages -%}\n  {{ raise_exception('Expected non-empty messages') }}\n{%- endif -%}\n{%- if ns.messages[0]['role'] | upper != 'SYSTEM' -%}\n  {%- set ns.messages = [{'role': 'SYSTEM', 'content': 'You are a helpful assistant with access to functions. Use them if required.'}] + ns.messages -%}\n{%- endif -%}\n{%- for message in ns.messages -%}\n  {%- set role = message['role'] | upper -%}\n  {%- set ns.content = message['content'] if message.get('content') else '' -%}\n  {%- if _mode == 'generate' -%}\n    {#- Move tool calls inside the content -#}\n    {%- if 'tool_calls' in message -%}\n      {%- for call in message['tool_calls'] -%}\n        {%- if not loop.first -%}\n          {%- set ns.content = ns.content + ' ' -%}\n        {%- endif -%}\n        {%- set ns.content = ns.content + '<functioncall>{\"name\": \"' + call['function']['name'] + '\", \"arguments\": ' + call['function']['arguments'] + '}' -%}\n      {%- endfor -%}\n    {%- endif -%}\n  {%- endif -%}\n  {#- Validation -#}\n  {%- if role not in message_roles -%}\n    {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles + ' are supported.') }}\n  {%- endif -%}\n  {%- if role == 'SYSTEM' and ns.seen_non_system -%}\n    {{ raise_exception('SYSTEM messages have to be at the front') }}\n  {%- endif -%}\n  {#- First message is guaranteed to be a SYSTEM message per earlier checks -#}\n  {%- if loop.first -%}\n    SYSTEM: {{ ns.content }}\n    {%- continue -%}\n  {%- endif -%}\n  {%- if role == ns.messages[loop.index0 - 1]['role'] | upper -%}\n    {{ ns.content }}\n    {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n      {{ eos_token }}\n    {%- endif -%}\n    {%- continue -%}\n  {%- endif -%}\n  {%- if role == 'ASSISTANT' and '<functioncall>' not in ns.content -%}\n    {#- TODO: make it work for duplicate messages where function call is in a subsequent message -#}\n    {%- set ns.content = '<plain>' + ns.content -%}\n  {%- endif -%}\n  {#- First message after the SYSTEM section -#}\n  {%- if not ns.seen_non_system and role != 'SYSTEM' -%}\n    {%- set ns.seen_non_system = true -%}\n    {{ '\\n\\n' }}FUNCTIONS: {{ ns.functions }}{{ '\\n\\n' }}\n    {#- Prompt masking separator -#}\n    {%- if _mode == 'train' -%}\n      {{ unk_token }}\n    {%- endif -%}\n    {{ role }}: {{ ns.content }}\n    {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n      {{ eos_token }}\n    {%- endif -%}\n    {%- continue -%}\n  {%- endif -%}\n  {{ '\\n\\n' }}{{ role }}: {{ ns.content }}\n  {%- if role == 'ASSISTANT' and (loop.last or ns.messages[loop.index0 + 1]['role'] | upper != 'ASSISTANT') -%}\n    {{ eos_token }}\n  {%- endif -%}\n{%- endfor -%}\n{%- if _mode == 'generate' -%}\n  {{ '\\n\\n' }}ASSISTANT:{{ ' ' }}\n{%- endif -%}\n",
  "clean_up_tokenization_spaces": false,
  "eos_token": "</s>",
  "legacy": false,
  "model_max_length": 1000000000000000019884624838656,
  "pad_token": "<unk>",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "tokenizer_class": "LlamaTokenizer",
  "unk_token": "<unk>",
  "use_default_system_prompt": false
}