seed: 0 exp_name: pythia_SFT_ga4 batch_size: 64 eval_batch_size: 32 debug: false fsdp_port: 12355 datasets: - hh wandb: enabled: true entity: pythia_dpo project: Pythia_LOM local_dirs: - /scr-ssd - /scr - .cache sample_during_eval: false n_eval_model_samples: 16 do_first_eval: true local_run_dir: .cache/laura/pythia_SFT_ga4_2023-07-16_16-50-13_244945 lr: 1.0e-06 gradient_accumulation_steps: 4 max_grad_norm: 10.0 max_length: 512 max_prompt_length: 256 n_epochs: 1 n_examples: null n_eval_examples: 256 trainer: FSDPTrainer optimizer: RMSprop warmup_steps: 150 activation_checkpointing: false eval_every: 19968 minimum_log_interval_secs: 1.0 model: name_or_path: EleutherAI/pythia-70m tokenizer_name_or_path: null archive: null block_name: GPTNeoXLayer policy_dtype: float32 fsdp_policy_mp: bfloat16 reference_dtype: float16 loss: name: sft