Set [SEP] as EOS token: update eos_token, eos_token_id, and generation config
Browse files- config.json +1 -1
- generation_config.json +1 -1
- special_tokens_map.json +1 -1
- tokenizer_config.json +1 -1
config.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"attn_pdrop": 0.1,
|
| 7 |
"bos_token_id": 50256,
|
| 8 |
"embd_pdrop": 0.1,
|
| 9 |
-
"eos_token_id":
|
| 10 |
"initializer_range": 0.02,
|
| 11 |
"layer_norm_epsilon": 1e-05,
|
| 12 |
"model_type": "gpt2",
|
|
|
|
| 6 |
"attn_pdrop": 0.1,
|
| 7 |
"bos_token_id": 50256,
|
| 8 |
"embd_pdrop": 0.1,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
"initializer_range": 0.02,
|
| 11 |
"layer_norm_epsilon": 1e-05,
|
| 12 |
"model_type": "gpt2",
|
generation_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
-
"eos_token_id":
|
| 5 |
"pad_token_id": 3,
|
| 6 |
"transformers_version": "4.55.4"
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
"pad_token_id": 3,
|
| 6 |
"transformers_version": "4.55.4"
|
| 7 |
}
|
special_tokens_map.json
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
"single_word": false
|
| 8 |
},
|
| 9 |
"eos_token": {
|
| 10 |
-
"content": "
|
| 11 |
"lstrip": false,
|
| 12 |
"normalized": false,
|
| 13 |
"rstrip": false,
|
|
|
|
| 7 |
"single_word": false
|
| 8 |
},
|
| 9 |
"eos_token": {
|
| 10 |
+
"content": "[SEP]",
|
| 11 |
"lstrip": false,
|
| 12 |
"normalized": false,
|
| 13 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
|
@@ -59,7 +59,7 @@
|
|
| 59 |
},
|
| 60 |
"bos_token": "<s>",
|
| 61 |
"clean_up_tokenization_spaces": false,
|
| 62 |
-
"eos_token": "
|
| 63 |
"extra_special_tokens": {},
|
| 64 |
"model_max_length": 1000000000000000019884624838656,
|
| 65 |
"pad_token": "[PAD]",
|
|
|
|
| 59 |
},
|
| 60 |
"bos_token": "<s>",
|
| 61 |
"clean_up_tokenization_spaces": false,
|
| 62 |
+
"eos_token": "[SEP]",
|
| 63 |
"extra_special_tokens": {},
|
| 64 |
"model_max_length": 1000000000000000019884624838656,
|
| 65 |
"pad_token": "[PAD]",
|