Training in progress, step 21000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c3635274eee21ba1976640f8142d5c1fbf66c82e539af94ae87a1f3ee9eb023
 size 136000488

 version https://git-lfs.github.com/spec/v1
+oid sha256:c303733e68b4d6a3464f154a0cef27f187ffb143a75e316fdc2ae74d008e2ebe
 size 136000488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36b31c0839e033c8fdb5f6e0bfc78e2d9735a0fe9264510a0e2f8d12a0d58d55
 size 268176506

 version https://git-lfs.github.com/spec/v1
+oid sha256:e33ab760cf381a1b3364cd0d214e029ee42b9595779e052acbaab68a7bbff730
 size 268176506

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ce1f6957d6020f16416f4fd345e9fae1e2983c0b203bfade0ebefc01a476151
 size 14942

 version https://git-lfs.github.com/spec/v1
+oid sha256:fac704f203b889a3f255581a24033dc58a68d7b38d0c3e2670b79fd002470370
 size 14942

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d84e3c52aa4a82118b4d8d3c4b8e8c4226ad3a7764da780159cac8984f3a66d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8323278f0ff55c37775f3c33790ba47e5be2f0255a208e4fc7ee51b8d608539
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.817535830422739,
   "eval_steps": 500,
-  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -147,6 +147,13 @@
       "learning_rate": 0.0008393961294467197,
       "loss": 1.068,
       "step": 20000
     }
   ],
   "logging_steps": 1000,
@@ -166,7 +173,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.0007346657191526e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.058412621943876,
   "eval_steps": 500,
+  "global_step": 21000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0008393961294467197,
       "loss": 1.068,
       "step": 20000
+    },
+    {
+      "epoch": 5.058412621943876,
+      "grad_norm": 0.4861578345298767,
+      "learning_rate": 0.0008313659359190556,
+      "loss": 1.0596,
+      "step": 21000
     }
   ],
   "logging_steps": 1000,
       "attributes": {}
     }
   },
+  "total_flos": 2.1007632729269862e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null