cashu
/

LLM-124M-v2

Model card Files Files and versions Community

LLM-124M-v2 / config.json

cashu's picture

Upload model

ef5d226 verified 8 months ago

history blame contribute delete

886 Bytes

	{
	"always_save_checkpoint": true,
	"architectures": [
	"CustomGPTModel"
	],
	"backend": "nccl",
	"batch_size": 12,
	"beta1": 0.9,
	"beta2": 0.95,
	"bias": false,
	"block_size": 1024,
	"compile": true,
	"dataset": "openwebtext",
	"decay_lr": true,
	"device": "cuda",
	"dropout": 0.0,
	"dtype": "bfloat16",
	"eval_interval": 1,
	"eval_iters": 1,
	"eval_only": false,
	"grad_clip": 1.0,
	"gradient_accumulation_steps": 40,
	"init_from": "scratch",
	"learning_rate": 0.0006,
	"log_interval": 1,
	"lr_decay_iters": 5,
	"max_iters": 5,
	"min_lr": 6e-05,
	"model_type": "custom_gpt",
	"n_embd": 768,
	"n_head": 12,
	"n_layer": 12,
	"out_dir": "out",
	"torch_dtype": "float32",
	"transformers_version": "4.42.3",
	"vocab_size": 50304,
	"wandb_log": false,
	"wandb_project": "owt",
	"wandb_run_name": "gpt2",
	"warmup_iters": 1,
	"weight_decay": 0.1
	}