benlipkin's picture
args and setup.
6ea4250
raw
history blame contribute delete
411 Bytes
{
"model": "gpt2",
"dataset": "wikitext",
"subset": "wikitext-103-v1",
"output_dir": "output",
"num_epochs": 20,
"num_tokens": 100000000,
"window_size": 16,
"batch_size": 1024,
"learning_rate": 1e-05,
"warmup_steps": 3000,
"scheduler": "cosine",
"weight_decay": 0.1,
"random_seed": 42,
"eval_steps": 1000,
"patience": 5,
"id": "1c15056cf51bff47"
}