|
{ |
|
"seed": 0, |
|
"exp_name": "pythia_SFT_ga4", |
|
"batch_size": 64, |
|
"eval_batch_size": 32, |
|
"debug": false, |
|
"fsdp_port": 12355, |
|
"datasets": [ |
|
"hh" |
|
], |
|
"wandb": { |
|
"enabled": true, |
|
"entity": "pythia_dpo", |
|
"project": "Pythia_LOM" |
|
}, |
|
"local_dirs": [ |
|
"/scr-ssd", |
|
"/scr", |
|
".cache" |
|
], |
|
"sample_during_eval": false, |
|
"n_eval_model_samples": 16, |
|
"do_first_eval": true, |
|
"local_run_dir": ".cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945", |
|
"lr": 0.000001, |
|
"gradient_accumulation_steps": 4, |
|
"max_grad_norm": 10, |
|
"max_length": 512, |
|
"max_prompt_length": 256, |
|
"n_epochs": 1, |
|
"n_examples": null, |
|
"n_eval_examples": 256, |
|
"trainer": "FSDPTrainer", |
|
"optimizer": "RMSprop", |
|
"warmup_steps": 150, |
|
"activation_checkpointing": false, |
|
"eval_every": 19968, |
|
"minimum_log_interval_secs": 1, |
|
"model": { |
|
"name_or_path": "EleutherAI/pythia-70m", |
|
"tokenizer_name_or_path": null, |
|
"archive": null, |
|
"block_name": "GPTNeoXLayer", |
|
"policy_dtype": "float32", |
|
"fsdp_policy_mp": "bfloat16", |
|
"reference_dtype": "float16" |
|
}, |
|
"loss": { |
|
"name": "sft" |
|
} |
|
} |
|
|