alicecomfy commited on
Commit
ed457a1
1 Parent(s): cf06159

Revert further tweaks to tokenizer config

Browse files

My previous reversion didn't fix the regression, the issue was not the chat_template but the eos token & addition of tokens not in the embed. This will be fixed with V2.0. shortly.

For now, using as is with ChatML template provides superior results.

Files changed (1) hide show
  1. tokenizer_config.json +1 -17
tokenizer_config.json CHANGED
@@ -25,28 +25,12 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
- },
29
- "32000": {
30
- "content": "<|im_end|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32001": {
38
- "content": "<|im_start|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": false
44
  }
45
  },
46
  "bos_token": "<s>",
47
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
48
  "clean_up_tokenization_spaces": false,
49
- "eos_token": "<s>",
50
  "legacy": false,
51
  "model_max_length": 1000000000000000019884624838656,
52
  "pad_token": "<unk>",
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "bos_token": "<s>",
31
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
  "legacy": false,
35
  "model_max_length": 1000000000000000019884624838656,
36
  "pad_token": "<unk>",