First model version

Files changed (4) hide show

config.yaml ADDED Viewed

+seed: 0
+exp_name: pythia160m_hh_ga4_sft
+batch_size: 64
+eval_batch_size: 32
+debug: false
+fsdp_port: 12355
+datasets:
+- hh
+wandb:
+  enabled: true
+  entity: pythia_dpo
+  project: Pythia_LOM
+local_dirs:
+- /scr-ssd
+- /scr
+- .cache
+sample_during_eval: false
+n_eval_model_samples: 16
+do_first_eval: true
+local_run_dir: .cache/laura/pythia160m_hh_ga4_sft_2023-07-18_13-09-00_785022
+lr: 1.0e-06
+gradient_accumulation_steps: 4
+max_grad_norm: 10.0
+max_length: 512
+max_prompt_length: 256
+n_epochs: 1
+n_examples: null
+n_eval_examples: 256
+trainer: FSDPTrainer
+optimizer: RMSprop
+warmup_steps: 150
+activation_checkpointing: false
+eval_every: 19968
+minimum_log_interval_secs: 1.0
+model:
+  name_or_path: EleutherAI/pythia-160m
+  tokenizer_name_or_path: null
+  archive: null
+  block_name: GPTNeoXLayer
+  policy_dtype: float32
+  fsdp_policy_mp: bfloat16
+  reference_dtype: float16
+loss:
+  name: sft

optimizer.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f2f7d3629d310fc48b37a984b9359c37be44232490d16cd1af97836f0f4e50
+size 649342149

policy.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:28c6302243a370f1cc34032e8301a166e920e79b90e52071a4fc1801ea1e4f7b
+size 699693719

scheduler.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef3bb892ec5f85ebcd911ec0358781fc843702c92bef54eec6bc38b127a23878
+size 627