casperhansen Praful932 commited on
Commit
ba1c958
1 Parent(s): f7fbeb2

update eos token (#6)

Browse files

- update eos token (a99e713e7f1ca41418d981bb0f8d0f38f1704911)
- update eos token id in config.json (df3417bf36d3edcc1fadca30f8724da22acf7ef9)
- correct eos token in special token map (f239207f101efffb4cd3e5f86a425717e3ee227a)


Co-authored-by: Praful Mohanan <Praful932@users.noreply.huggingface.co>

Files changed (3) hide show
  1. config.json +1 -1
  2. special_tokens_map.json +1 -1
  3. tokenizer_config.json +1 -1
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": 128001,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
 
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": 128009,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|end_of_text|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -2052,7 +2052,7 @@
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2054
  "clean_up_tokenization_spaces": true,
2055
- "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
 
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"