seed: 0 exp_name: pythia410m_hh_ga4_dpo batch_size: 32 eval_batch_size: 32 debug: false fsdp_port: 12355 datasets: - hh wandb: enabled: true entity: pythia_dpo project: Pythia_LOM local_dirs: - /scr-ssd - /scr - .cache sample_during_eval: false n_eval_model_samples: 16 do_first_eval: true local_run_dir: .cache/laura/pythia410m_hh_ga4_dpo_2023-07-20_13-33-11_249010 lr: 1.0e-06 gradient_accumulation_steps: 4 max_grad_norm: 10.0 max_length: 512 max_prompt_length: 256 n_epochs: 1 n_examples: null n_eval_examples: 256 trainer: FSDPTrainer optimizer: RMSprop warmup_steps: 150 activation_checkpointing: false eval_every: 20000 minimum_log_interval_secs: 1.0 model: name_or_path: EleutherAI/pythia-410m tokenizer_name_or_path: null archive: .cache/laura/pythia410m_hh_ga4_sft_2023-07-19_13-02-35_532776/step-159744/policy.pt block_name: GPTNeoXLayer policy_dtype: float32 fsdp_policy_mp: null reference_dtype: float16 loss: name: dpo beta: 0.1 reference_free: false