PathFinderKR commited on
Commit
fcfe2d3
1 Parent(s): e9bbf4f

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -11
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +2 -16
special_tokens_map.json CHANGED
@@ -1,7 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|none1|>"
4
- ],
5
  "bos_token": {
6
  "content": "<|begin_of_text|>",
7
  "lstrip": false,
@@ -10,14 +7,7 @@
10
  "single_word": false
11
  },
12
  "eos_token": {
13
- "content": "<|end_of_text|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false
18
- },
19
- "pad_token": {
20
- "content": "<|end_of_text|>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
1
  {
 
 
 
2
  "bos_token": {
3
  "content": "<|begin_of_text|>",
4
  "lstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|eot_id|>",
 
 
 
 
 
 
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -2047,30 +2047,16 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
- },
2051
- "145791": {
2052
- "content": "<|none1|>",
2053
- "lstrip": false,
2054
- "normalized": false,
2055
- "rstrip": false,
2056
- "single_word": false,
2057
- "special": true
2058
  }
2059
  },
2060
- "additional_special_tokens": [
2061
- "<|none1|>"
2062
- ],
2063
  "bos_token": "<|begin_of_text|>",
2064
- "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2065
  "clean_up_tokenization_spaces": true,
2066
- "eos_token": "<|end_of_text|>",
2067
  "model_input_names": [
2068
  "input_ids",
2069
  "attention_mask"
2070
  ],
2071
  "model_max_length": 1000000000000000019884624838656,
2072
- "pad_token": "<|end_of_text|>",
2073
- "padding_side": "right",
2074
- "split_special_tokens": false,
2075
  "tokenizer_class": "PreTrainedTokenizerFast"
2076
  }
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
 
 
 
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
 
 
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }