{ "seed": 0, "exp_name": "pythia_SFT_ga4", "batch_size": 64, "eval_batch_size": 32, "debug": false, "fsdp_port": 12355, "datasets": [ "hh" ], "wandb": { "enabled": true, "entity": "pythia_dpo", "project": "Pythia_LOM" }, "local_dirs": [ "/scr-ssd", "/scr", ".cache" ], "sample_during_eval": false, "n_eval_model_samples": 16, "do_first_eval": true, "local_run_dir": ".cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945", "lr": 0.000001, "gradient_accumulation_steps": 4, "max_grad_norm": 10, "max_length": 512, "max_prompt_length": 256, "n_epochs": 1, "n_examples": null, "n_eval_examples": 256, "trainer": "FSDPTrainer", "optimizer": "RMSprop", "warmup_steps": 150, "activation_checkpointing": false, "eval_every": 19968, "minimum_log_interval_secs": 1, "model": { "name_or_path": "EleutherAI/pythia-70m", "tokenizer_name_or_path": null, "archive": null, "block_name": "GPTNeoXLayer", "policy_dtype": "float32", "fsdp_policy_mp": "bfloat16", "reference_dtype": "float16" }, "loss": { "name": "sft" } }