Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b8879fe095f0f5a0bc15de492b77df156859ad8ecdf5a7085278229661e4e78
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1701d1397a8f6bba03a038aad0ad88dddbe56212ed6ac753bf48dccf50090e24
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4b4a1552fc2fdd814013dbc89ab5192839e5c2df5e80659c707de2b136e30c5
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d5cbf4360926fd5a69ab224acee68af41a01edad72a7837d83695317fff4262
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e08deeaeeb196d6a8f41f78a6bc82f6af2fe7090aa1bcee32f60529b5b318530
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7882cf9d1800e045d97afc34ed2d790cd5f0da147adeb6824c51ec77a35e0c5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f92d50505b95f6ce6845ff480e774269016727df3369556b5afce1e371e58a3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:47e7c3293120b0e1021fffede4430570f0c03435609ec93915f9f3961852aa6d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0003982128208599804,
   "eval_steps": 500,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -67,6 +67,66 @@
       "reward_std": 1.2611359059810638,
       "rewards/custom_reward_logic_v2": -3.831325000524521,
       "step": 50
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0007964256417199608,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 1.2611359059810638,
       "rewards/custom_reward_logic_v2": -3.831325000524521,
       "step": 50
+    },
+    {
+      "completion_length": 860.95,
+      "epoch": 0.0004778553850319765,
+      "grad_norm": 0.16106949746608734,
+      "kl": 0.0007545762317022308,
+      "learning_rate": 3.7500000000000005e-06,
+      "loss": 0.0,
+      "reward": -3.99547501206398,
+      "reward_std": 1.233138319849968,
+      "rewards/custom_reward_logic_v2": -3.99547501206398,
+      "step": 60
+    },
+    {
+      "completion_length": 831.175,
+      "epoch": 0.0005574979492039726,
+      "grad_norm": 0.1723652333021164,
+      "kl": 0.0007971685263328254,
+      "learning_rate": 4.3750000000000005e-06,
+      "loss": 0.0,
+      "reward": -4.036549943685531,
+      "reward_std": 1.5394920334219933,
+      "rewards/custom_reward_logic_v2": -4.036549943685531,
+      "step": 70
+    },
+    {
+      "completion_length": 874.325,
+      "epoch": 0.0006371405133759687,
+      "grad_norm": 0.2079666703939438,
+      "kl": 0.0008876581850927323,
+      "learning_rate": 5e-06,
+      "loss": 0.0,
+      "reward": -3.92242501154542,
+      "reward_std": 1.2604085817933082,
+      "rewards/custom_reward_logic_v2": -3.92242501154542,
+      "step": 80
+    },
+    {
+      "completion_length": 791.91875,
+      "epoch": 0.0007167830775479647,
+      "grad_norm": 0.16253575682640076,
+      "kl": 0.0010255174711346626,
+      "learning_rate": 4.997620553954645e-06,
+      "loss": 0.0,
+      "reward": -3.364587500691414,
+      "reward_std": 1.2228698313236237,
+      "rewards/custom_reward_logic_v2": -3.364587500691414,
+      "step": 90
+    },
+    {
+      "completion_length": 846.2875,
+      "epoch": 0.0007964256417199608,
+      "grad_norm": 0.18019770085811615,
+      "kl": 0.0013353260728763416,
+      "learning_rate": 4.990486745229364e-06,
+      "loss": 0.0001,
+      "reward": -3.805912530422211,
+      "reward_std": 1.2458222389221192,
+      "rewards/custom_reward_logic_v2": -3.805912530422211,
+      "step": 100
     }
   ],
   "logging_steps": 10,