{ "model_config": { "model_type": "llama2", "mamba": null, "llama2": { "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": -1, "eos_token_id": -2, "hidden_act": "silu", "hidden_size": 48, "initializer_range": 0.02, "intermediate_size": 48, "max_position_embeddings": 513, "num_attention_heads": 2, "num_hidden_layers": 2, "num_key_value_heads": 2, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "use_cache": true, "vocab_size": 4096 } }, "run_name": "debug__2024_03_19_00_25_35", "output_dir": "/Users/jaidhyani/Library/Application Support/delphi/debug__2024_03_19_00_25_35", "huggingface": { "repo_id": null, "save_checkpoints": false }, "device": "auto", "eval_interval": 1, "log_interval": 1, "eval_iters": 1, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_config": { "log": true, "project": "delphi", "entity": "jaiwithani" }, "batch_size": 64, "max_seq_len": 512, "max_epochs": 1, "grad_clip": 1.0, "optimizer": { "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "min_lr": 0.0 }, "train_sample_limit": 1024, "val_sample_limit": -1 }