Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f0b9fe960d8b700432d64261f2e9fb8b5e2feb648ccba2b26954af39f14f187
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d0b10032621567c53e0e12f4051e3fa7f9880f7b87ae153f1600c2eed98d364
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6980510b381af43d0d08ceb7a59264815242f1f6223f0a5de785069b7c5b74d7
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c7399a7124ebfc8f2602ee4160ffecfe9c2c5345d69e9697c401f6fd3bd73d
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63e7217b76d9787359cf45d83ba0b63cb6335c60810ebfe7324880fdde71d442
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d71d011e87b512f28e794476e44bdcb409ab9a4721e9b4147120eeb12f1053d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9d42f94a00d3d38a441f86dbb87d1da2ac6b6e6fceeb3fff0437ffb348f193a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:47394e7e6639b2da14254e3e88e50a05cb1b1d15a05d1aa46398ae3b93c7909f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0011946384625799412,
   "eval_steps": 500,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -187,6 +187,66 @@
       "reward_std": 0.430637900531292,
       "rewards/custom_reward_logic_v2": -0.04024999849498272,
       "step": 150
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0015928512834399217,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.430637900531292,
       "rewards/custom_reward_logic_v2": -0.04024999849498272,
       "step": 150
+    },
+    {
+      "completion_length": 18.93125,
+      "epoch": 0.0012742810267519374,
+      "grad_norm": 0.032512303441762924,
+      "kl": 0.33459745422005654,
+      "learning_rate": 4.849231551964771e-06,
+      "loss": 0.0134,
+      "reward": 0.1650000035762787,
+      "reward_std": 0.07605109438300132,
+      "rewards/custom_reward_logic_v2": 0.1650000035762787,
+      "step": 160
+    },
+    {
+      "completion_length": 17.4375,
+      "epoch": 0.0013539235909239334,
+      "grad_norm": 0.02004638873040676,
+      "kl": 0.35064528286457064,
+      "learning_rate": 4.809698831278217e-06,
+      "loss": 0.014,
+      "reward": 0.08999999985098839,
+      "reward_std": 0.125558003783226,
+      "rewards/custom_reward_logic_v2": 0.08999999985098839,
+      "step": 170
+    },
+    {
+      "completion_length": 23.975,
+      "epoch": 0.0014335661550959294,
+      "grad_norm": 0.2281995564699173,
+      "kl": 0.3118164837360382,
+      "learning_rate": 4.765769467591626e-06,
+      "loss": 0.0125,
+      "reward": 0.08099999986588954,
+      "reward_std": 0.18301311507821083,
+      "rewards/custom_reward_logic_v2": 0.08099999986588954,
+      "step": 180
+    },
+    {
+      "completion_length": 17.7,
+      "epoch": 0.0015132087192679256,
+      "grad_norm": 0.20832708477973938,
+      "kl": 0.34881954491138456,
+      "learning_rate": 4.717527082945555e-06,
+      "loss": 0.014,
+      "reward": 0.14687500111758708,
+      "reward_std": 0.13193419948220253,
+      "rewards/custom_reward_logic_v2": 0.14687500111758708,
+      "step": 190
+    },
+    {
+      "completion_length": 28.76875,
+      "epoch": 0.0015928512834399217,
+      "grad_norm": 0.2148224264383316,
+      "kl": 0.4086977861821651,
+      "learning_rate": 4.665063509461098e-06,
+      "loss": 0.0163,
+      "reward": 0.06411250084638595,
+      "reward_std": 0.09681975245475768,
+      "rewards/custom_reward_logic_v2": 0.06411250084638595,
+      "step": 200
     }
   ],
   "logging_steps": 10,