|
{ |
|
"always_save_checkpoint": true, |
|
"architectures": [ |
|
"CustomGPTModel" |
|
], |
|
"backend": "nccl", |
|
"batch_size": 12, |
|
"beta1": 0.9, |
|
"beta2": 0.95, |
|
"bias": false, |
|
"block_size": 1024, |
|
"compile": true, |
|
"dataset": "openwebtext", |
|
"decay_lr": true, |
|
"device": "cuda", |
|
"dropout": 0.0, |
|
"dtype": "bfloat16", |
|
"eval_interval": 1, |
|
"eval_iters": 1, |
|
"eval_only": false, |
|
"grad_clip": 1.0, |
|
"gradient_accumulation_steps": 40, |
|
"init_from": "scratch", |
|
"learning_rate": 0.0006, |
|
"log_interval": 1, |
|
"lr_decay_iters": 5, |
|
"max_iters": 5, |
|
"min_lr": 6e-05, |
|
"model_type": "custom_gpt", |
|
"n_embd": 768, |
|
"n_head": 12, |
|
"n_layer": 12, |
|
"out_dir": "out", |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.42.3", |
|
"vocab_size": 50304, |
|
"wandb_log": false, |
|
"wandb_project": "owt", |
|
"wandb_run_name": "gpt2", |
|
"warmup_iters": 1, |
|
"weight_decay": 0.1 |
|
} |
|
|