{
  "always_save_checkpoint": true,
  "architectures": [
    "CustomGPTModel"
  ],
  "backend": "nccl",
  "batch_size": 12,
  "beta1": 0.9,
  "beta2": 0.95,
  "bias": false,
  "block_size": 1024,
  "compile": true,
  "dataset": "openwebtext",
  "decay_lr": true,
  "device": "cuda",
  "dropout": 0.0,
  "dtype": "bfloat16",
  "eval_interval": 1,
  "eval_iters": 1,
  "eval_only": false,
  "grad_clip": 1.0,
  "gradient_accumulation_steps": 40,
  "init_from": "scratch",
  "learning_rate": 0.0006,
  "log_interval": 1,
  "lr_decay_iters": 5,
  "max_iters": 5,
  "min_lr": 6e-05,
  "model_type": "custom_gpt",
  "n_embd": 768,
  "n_head": 12,
  "n_layer": 12,
  "out_dir": "out",
  "torch_dtype": "float32",
  "transformers_version": "4.42.3",
  "vocab_size": 50304,
  "wandb_log": false,
  "wandb_project": "owt",
  "wandb_run_name": "gpt2",
  "warmup_iters": 1,
  "weight_decay": 0.1
}