rahuljoy commited on
Commit
5111eab
·
verified ·
1 Parent(s): 6ac3f8b

Upload tokenizer

Browse files
Files changed (3) hide show
  1. chat_template.jinja +13 -20
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +8 -12
chat_template.jinja CHANGED
@@ -1,22 +1,15 @@
1
-
2
- {{ bos_token }}
3
- {% if messages[0]['role'] == 'system' %}
4
- {{ raise_exception('System role not supported') }}
5
- {% endif %}
6
  {% for message in messages %}
7
- {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
8
- {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
9
- {% endif %}
10
- {% if message['role'] == 'assistant' %}
11
- <start_of_turn>model
12
- {% generation %}{{ message['content'] | trim }}{% endgeneration %}
13
- <end_of_turn>
14
- {% else %}
15
- <start_of_turn>{{ message['role'] }}
16
- {{ message['content'] | trim }}
17
- <end_of_turn>
18
- {% endif %}
19
- {% endfor %}
20
- {% if add_generation_prompt %}
21
- <start_of_turn>model
22
  {% endif %}
 
 
 
 
 
 
 
1
  {% for message in messages %}
2
+ {% if message['role'] == 'user' %}
3
+ {{ '<|user|>
4
+ ' + message['content'] + eos_token }}
5
+ {% elif message['role'] == 'system' %}
6
+ {{ '<|system|>
7
+ ' + message['content'] + eos_token }}
8
+ {% elif message['role'] == 'assistant' %}
9
+ {{ '<|assistant|>
10
+ ' + message['content'] + eos_token }}
11
+ {% endif %}
12
+ {% if loop.last and add_generation_prompt %}
13
+ {{ '<|assistant|>' }}
 
 
 
14
  {% endif %}
15
+ {% endfor %}
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:487cee8724215dcd2dde8888539e8b1bf844ceb5dbbe27f7845abda69eeb060f
3
- size 34362872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81bb383c6138159665cd350a59fbcb5aa1fe65712cf5ed9b6481bc802cdbd8e5
3
+ size 3618870
tokenizer_config.json CHANGED
@@ -1,19 +1,15 @@
1
  {
 
2
  "backend": "tokenizers",
3
- "bos_token": "<bos>",
4
  "clean_up_tokenization_spaces": false,
5
- "eos_token": "<eos>",
6
- "extra_special_tokens": [
7
- "<start_of_turn>",
8
- "<end_of_turn>"
9
- ],
10
- "is_local": false,
11
- "mask_token": "<mask>",
12
- "model_max_length": 1000000000000000019884624838656,
13
- "pad_token": "<pad>",
14
  "sp_model_kwargs": {},
15
- "spaces_between_special_tokens": false,
16
- "tokenizer_class": "GemmaTokenizer",
17
  "unk_token": "<unk>",
18
  "use_default_system_prompt": false
19
  }
 
1
  {
2
+ "add_prefix_space": null,
3
  "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
  "clean_up_tokenization_spaces": false,
6
+ "eos_token": "</s>",
7
+ "is_local": true,
8
+ "model_max_length": 2048,
9
+ "pad_token": "</s>",
10
+ "padding_side": "right",
 
 
 
 
11
  "sp_model_kwargs": {},
12
+ "tokenizer_class": "LlamaTokenizer",
 
13
  "unk_token": "<unk>",
14
  "use_default_system_prompt": false
15
  }