LLM-124M-v2 / config.json
cashu's picture
Upload model
ef5d226 verified
raw
history blame contribute delete
886 Bytes
{
"always_save_checkpoint": true,
"architectures": [
"CustomGPTModel"
],
"backend": "nccl",
"batch_size": 12,
"beta1": 0.9,
"beta2": 0.95,
"bias": false,
"block_size": 1024,
"compile": true,
"dataset": "openwebtext",
"decay_lr": true,
"device": "cuda",
"dropout": 0.0,
"dtype": "bfloat16",
"eval_interval": 1,
"eval_iters": 1,
"eval_only": false,
"grad_clip": 1.0,
"gradient_accumulation_steps": 40,
"init_from": "scratch",
"learning_rate": 0.0006,
"log_interval": 1,
"lr_decay_iters": 5,
"max_iters": 5,
"min_lr": 6e-05,
"model_type": "custom_gpt",
"n_embd": 768,
"n_head": 12,
"n_layer": 12,
"out_dir": "out",
"torch_dtype": "float32",
"transformers_version": "4.42.3",
"vocab_size": 50304,
"wandb_log": false,
"wandb_project": "owt",
"wandb_run_name": "gpt2",
"warmup_iters": 1,
"weight_decay": 0.1
}