Training in progress, step 705, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1aabb06c1fe37ff25379e382b1f17082b07d34182a4fc2da6ffae8579ac4675e
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:276cc98327297733df76553f5e3b6d4860107114f0c0d09e76850d88f2e3e792
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d75409d4f4ef99a18e288d75b5c709cef3c171c10c32cf11b3494ebb2d324c6
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:67e1bd4b745e410efa1702b658de1ecd4ec95ec3e117bd275ec883dc33e4fe8f
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db4a5aa1d65732ea6e6ad6ffbd33a4afe19476644f2bd043f99022469dab6bc0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2d3d37bb63f7049bbb3b5a0e021e4d5be2c3c48a17668f2b1c66f33aefe4ecd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e88a0b12b39fadaa49a1f55d69192330694c5d8626f92166735ca7ee1b34dd9e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f8030d97101266fb0ab5c1ea76d734a66ed4913265b7bd97b3ca554de2e26fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9058557101261727,
   "eval_steps": 386,
-  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4923,6 +4923,41 @@
       "learning_rate": 9.298875414358399e-05,
       "loss": 0.9095,
       "step": 700
     }
   ],
   "logging_steps": 1,
@@ -4942,7 +4977,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.825560201854976e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9123261080556454,
   "eval_steps": 386,
+  "global_step": 705,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.298875414358399e-05,
       "loss": 0.9095,
       "step": 700
+    },
+    {
+      "epoch": 0.9071497897120673,
+      "grad_norm": 0.6941442489624023,
+      "learning_rate": 9.296768538192853e-05,
+      "loss": 0.8285,
+      "step": 701
+    },
+    {
+      "epoch": 0.9084438692979618,
+      "grad_norm": 0.7753114700317383,
+      "learning_rate": 9.294658740622573e-05,
+      "loss": 0.9726,
+      "step": 702
+    },
+    {
+      "epoch": 0.9097379488838564,
+      "grad_norm": 0.8224105834960938,
+      "learning_rate": 9.292546023082025e-05,
+      "loss": 0.8728,
+      "step": 703
+    },
+    {
+      "epoch": 0.9110320284697508,
+      "grad_norm": 0.9305656552314758,
+      "learning_rate": 9.29043038700766e-05,
+      "loss": 0.891,
+      "step": 704
+    },
+    {
+      "epoch": 0.9123261080556454,
+      "grad_norm": 0.7868937849998474,
+      "learning_rate": 9.288311833837917e-05,
+      "loss": 0.8883,
+      "step": 705
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.881457060439654e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null