danielhanchen commited on
Commit
36d1940
1 Parent(s): 79515e1

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -29
  2. tokenizer_config.json +3 -2
tokenizer.json CHANGED
@@ -150,12 +150,6 @@
150
  "post_processor": {
151
  "type": "TemplateProcessing",
152
  "single": [
153
- {
154
- "SpecialToken": {
155
- "id": "<s>",
156
- "type_id": 0
157
- }
158
- },
159
  {
160
  "Sequence": {
161
  "id": "A",
@@ -164,24 +158,12 @@
164
  }
165
  ],
166
  "pair": [
167
- {
168
- "SpecialToken": {
169
- "id": "<s>",
170
- "type_id": 0
171
- }
172
- },
173
  {
174
  "Sequence": {
175
  "id": "A",
176
  "type_id": 0
177
  }
178
  },
179
- {
180
- "SpecialToken": {
181
- "id": "<s>",
182
- "type_id": 1
183
- }
184
- },
185
  {
186
  "Sequence": {
187
  "id": "B",
@@ -189,17 +171,7 @@
189
  }
190
  }
191
  ],
192
- "special_tokens": {
193
- "<s>": {
194
- "id": "<s>",
195
- "ids": [
196
- 1
197
- ],
198
- "tokens": [
199
- "<s>"
200
- ]
201
- }
202
- }
203
  },
204
  "decoder": {
205
  "type": "Sequence",
 
150
  "post_processor": {
151
  "type": "TemplateProcessing",
152
  "single": [
 
 
 
 
 
 
153
  {
154
  "Sequence": {
155
  "id": "A",
 
158
  }
159
  ],
160
  "pair": [
 
 
 
 
 
 
161
  {
162
  "Sequence": {
163
  "id": "A",
164
  "type_id": 0
165
  }
166
  },
 
 
 
 
 
 
167
  {
168
  "Sequence": {
169
  "id": "B",
 
171
  }
172
  }
173
  ],
174
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
175
  },
176
  "decoder": {
177
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
- "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -116,7 +117,7 @@
116
  }
117
  },
118
  "bos_token": "<s>",
119
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
122
  "legacy": false,
 
1
  {
2
+ "add_bos_token": false,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
117
  }
118
  },
119
  "bos_token": "<s>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,