VictorDCh commited on
Commit
cc11486
·
verified ·
1 Parent(s): 348d2a3

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<|im_end|>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "<|im_start|>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "<|im_start|>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<|im_end|>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
tokenizer.json CHANGED
@@ -2309,7 +2309,7 @@
2309
  },
2310
  {
2311
  "id": 128256,
2312
- "content": "<|im_end|>",
2313
  "single_word": false,
2314
  "lstrip": false,
2315
  "rstrip": false,
@@ -2318,7 +2318,7 @@
2318
  },
2319
  {
2320
  "id": 128257,
2321
- "content": "<|im_start|>",
2322
  "single_word": false,
2323
  "lstrip": false,
2324
  "rstrip": false,
@@ -2425,6 +2425,7 @@
2425
  "end_of_word_suffix": null,
2426
  "fuse_unk": false,
2427
  "byte_fallback": false,
 
2428
  "vocab": {
2429
  "!": 0,
2430
  "\"": 1,
 
2309
  },
2310
  {
2311
  "id": 128256,
2312
+ "content": "<|im_start|>",
2313
  "single_word": false,
2314
  "lstrip": false,
2315
  "rstrip": false,
 
2318
  },
2319
  {
2320
  "id": 128257,
2321
+ "content": "<|im_end|>",
2322
  "single_word": false,
2323
  "lstrip": false,
2324
  "rstrip": false,
 
2425
  "end_of_word_suffix": null,
2426
  "fuse_unk": false,
2427
  "byte_fallback": false,
2428
+ "ignore_merges": true,
2429
  "vocab": {
2430
  "!": 0,
2431
  "\"": 1,
tokenizer_config.json CHANGED
@@ -2049,7 +2049,7 @@
2049
  "special": true
2050
  },
2051
  "128256": {
2052
- "content": "<|im_end|>",
2053
  "lstrip": false,
2054
  "normalized": false,
2055
  "rstrip": false,
@@ -2057,7 +2057,7 @@
2057
  "special": true
2058
  },
2059
  "128257": {
2060
- "content": "<|im_start|>",
2061
  "lstrip": false,
2062
  "normalized": false,
2063
  "rstrip": false,
@@ -2066,8 +2066,8 @@
2066
  }
2067
  },
2068
  "additional_special_tokens": [
2069
- "<|im_end|>",
2070
- "<|im_start|>"
2071
  ],
2072
  "bos_token": "<|im_start|>",
2073
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
 
2049
  "special": true
2050
  },
2051
  "128256": {
2052
+ "content": "<|im_start|>",
2053
  "lstrip": false,
2054
  "normalized": false,
2055
  "rstrip": false,
 
2057
  "special": true
2058
  },
2059
  "128257": {
2060
+ "content": "<|im_end|>",
2061
  "lstrip": false,
2062
  "normalized": false,
2063
  "rstrip": false,
 
2066
  }
2067
  },
2068
  "additional_special_tokens": [
2069
+ "<|im_start|>",
2070
+ "<|im_end|>"
2071
  ],
2072
  "bos_token": "<|im_start|>",
2073
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",