training using pytorch native 3 epoch, batch size 14, block size 512,lr 1e-4 cosine
8d45360
verified
{ | |
"bos_token": "<|endoftext|>", | |
"eos_token": "<|endoftext|>", | |
"pad_token": "<|endoftext|>", | |
"unk_token": "<|endoftext|>" | |
} | |
{ | |
"bos_token": "<|endoftext|>", | |
"eos_token": "<|endoftext|>", | |
"pad_token": "<|endoftext|>", | |
"unk_token": "<|endoftext|>" | |
} | |