Training in progress, step 1550, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +38 -2

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:684ffe225c9700091e981d7024210dc263a03c53399f30fe5949b6f253fdbcc7
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4620c952d9d47f51b52dbaa4d676d99f0fa4424c13c706daf27aef599e4f2780
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a687432a9d28051c1c39f8f0f4a0e50ee514d1a0fd3655eab260eb68945b822d
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:1dcd3d1e5ce9aeb2f3a0a439162a6354e3a87bdd42c2d824aa351222059be0bd
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99aee5f4fee1e5dcba147a263eab258ec9ed62b758fd41d69b67bd6b96aba2c6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3862494dcb8330500216500c3c10e90221d897be72203e9a311c19db30c18205
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8bce7973cc00369755cc2848c8e4a859027c75254c0a4740d15b2cd113fceee
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbfbb7156838856bc44aea3d88f369f30c1abce4e6c21899507c2f357e97b51c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.012145491036229403,
   "eval_steps": 500,
-  "global_step": 1525,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1831,6 +1831,42 @@
       "reward_std": 0.3324665643274784,
       "rewards/custom_reward_simplified_v7_dblog": 0.790625,
       "step": 1520
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.012344597446659392,
   "eval_steps": 500,
+  "global_step": 1550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.3324665643274784,
       "rewards/custom_reward_simplified_v7_dblog": 0.790625,
       "step": 1520
+    },
+    {
+      "completion_length": 674.3625,
+      "epoch": 0.0121853123183154,
+      "grad_norm": 0.2344941943883896,
+      "kl": 0.012514100456610323,
+      "learning_rate": 1.7482355012393177e-06,
+      "loss": 0.0005,
+      "reward": 0.859375,
+      "reward_std": 0.3387090668082237,
+      "rewards/custom_reward_simplified_v7_dblog": 0.859375,
+      "step": 1530
+    },
+    {
+      "completion_length": 718.6,
+      "epoch": 0.012264954882487397,
+      "grad_norm": 0.2631664276123047,
+      "kl": 0.014576551388017833,
+      "learning_rate": 1.7136381096209665e-06,
+      "loss": 0.0006,
+      "reward": 0.653125,
+      "reward_std": 0.24619419425725936,
+      "rewards/custom_reward_simplified_v7_dblog": 0.653125,
+      "step": 1540
+    },
+    {
+      "completion_length": 706.28125,
+      "epoch": 0.012344597446659392,
+      "grad_norm": 0.20134921371936798,
+      "kl": 0.012202254333533346,
+      "learning_rate": 1.6792070619660977e-06,
+      "loss": 0.0005,
+      "reward": 0.84375,
+      "reward_std": 0.3321776181459427,
+      "rewards/custom_reward_simplified_v7_dblog": 0.84375,
+      "step": 1550
     }
   ],
   "logging_steps": 10,