File size: 1,134 Bytes
cf1d221 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
{
"seed": 0,
"exp_name": "pythia_SFT_ga4",
"batch_size": 64,
"eval_batch_size": 32,
"debug": false,
"fsdp_port": 12355,
"datasets": [
"hh"
],
"wandb": {
"enabled": true,
"entity": "pythia_dpo",
"project": "Pythia_LOM"
},
"local_dirs": [
"/scr-ssd",
"/scr",
".cache"
],
"sample_during_eval": false,
"n_eval_model_samples": 16,
"do_first_eval": true,
"local_run_dir": ".cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945",
"lr": 0.000001,
"gradient_accumulation_steps": 4,
"max_grad_norm": 10,
"max_length": 512,
"max_prompt_length": 256,
"n_epochs": 1,
"n_examples": null,
"n_eval_examples": 256,
"trainer": "FSDPTrainer",
"optimizer": "RMSprop",
"warmup_steps": 150,
"activation_checkpointing": false,
"eval_every": 19968,
"minimum_log_interval_secs": 1,
"model": {
"name_or_path": "EleutherAI/pythia-70m",
"tokenizer_name_or_path": null,
"archive": null,
"block_name": "GPTNeoXLayer",
"policy_dtype": "float32",
"fsdp_policy_mp": "bfloat16",
"reference_dtype": "float16"
},
"loss": {
"name": "sft"
}
}
|