training using pytorch native 10 epoch, batch size 8, block size 512,lr 1e-4 cosine
6f190f4
verified
{ | |
"_from_model_config": true, | |
"bos_token_id": 50256, | |
"eos_token_id": 50256, | |
"transformers_version": "4.40.1" | |
} | |
{ | |
"_from_model_config": true, | |
"bos_token_id": 50256, | |
"eos_token_id": 50256, | |
"transformers_version": "4.40.1" | |
} | |