Training in progress, step 860, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2446e323705b3c535d054969f9761f5b59a5e3bbc07424bc218a9034170b3134
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:99b129df98bdc306d1bc4565ee000cc0b871ca7381053214c2914dac7ae77608
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fc67b9db032061bdb48e9268ce036f9baffb6470d5aa76f5eb0276949db998b
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb976eeabc1d40bc1ef543f0d6b42e69810a3b568ef7fec526ce855dcd53f250
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:085f468c9ca1512d3e0074132a3b99e1cdcd42473eeebeb3fe393de996efa9e0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:798b54db949c4ae9de08b62eac89d6111767c04dba8dc38518460f18e2c13d16
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc74520f1262c2228ad975fd3e788ebc1722dd5506dd79c24c13063ed9e2e823
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2d74cdd7773895240aff3837ca564e1fc035a5a8a0853fee30d28f2c6ee4c25
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1064380459398253,
   "eval_steps": 386,
-  "global_step": 855,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6016,6 +6016,41 @@
       "learning_rate": 8.938360996464048e-05,
       "loss": 0.821,
       "step": 855
     }
   ],
   "logging_steps": 1,
@@ -6035,7 +6070,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.557664107247698e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.112908443869298,
   "eval_steps": 386,
+  "global_step": 860,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.938360996464048e-05,
       "loss": 0.821,
       "step": 855
+    },
+    {
+      "epoch": 1.1077321255257198,
+      "grad_norm": 0.910192608833313,
+      "learning_rate": 8.93581960783559e-05,
+      "loss": 0.7756,
+      "step": 856
+    },
+    {
+      "epoch": 1.1090262051116144,
+      "grad_norm": 0.8553609848022461,
+      "learning_rate": 8.933275543214245e-05,
+      "loss": 0.8378,
+      "step": 857
+    },
+    {
+      "epoch": 1.1103202846975089,
+      "grad_norm": 0.9897130131721497,
+      "learning_rate": 8.930728804329744e-05,
+      "loss": 0.8233,
+      "step": 858
+    },
+    {
+      "epoch": 1.1116143642834033,
+      "grad_norm": 0.8921964168548584,
+      "learning_rate": 8.928179392913633e-05,
+      "loss": 0.9355,
+      "step": 859
+    },
+    {
+      "epoch": 1.112908443869298,
+      "grad_norm": 1.06694757938385,
+      "learning_rate": 8.925627310699275e-05,
+      "loss": 0.9271,
+      "step": 860
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9.613560965832376e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null