Training in progress, step 192, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:497ee0fe7e43f8ea32b3d8c1c5933e2f0d6aa06daedab01b599f930b1bb7891e
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:c82a800c179ba423aec97639d9ccc384481b0ce2554f370d1b33c0d7e72ade4e
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fc0b933514016e773d11fe2e7a3c622fca43b764bfdd03230cc4795569fb093
 size 18810036

 version https://git-lfs.github.com/spec/v1
+oid sha256:85e5bdecfd8d470dcdb63d2257591509be80d9a8efbb2ae893e9f5faabe91b36
 size 18810036

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:410fe0096ab6317dd300ab4f9bac4f0a1c35ff510e31cab3d44f3ea101c6dfc6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2be38675289fa52218b04199b6f2593e112267ecbf7ee0bb9a6c1998df0f09e7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:357be5f693f9421d5e744dfb42bb8fe1529623e030b048f1ac356fae9ff56ff5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:430952bd40ab71b9a96e33c9bed5a81b004c335aa948503b4d61063a45bfb5c2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 15.75,
   "eval_steps": 3,
-  "global_step": 189,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1842,6 +1842,35 @@
       "eval_samples_per_second": 4.99,
       "eval_steps_per_second": 2.994,
       "step": 189
     }
   ],
   "logging_steps": 1,
@@ -1861,7 +1890,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.349085735419904e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 16.0,
   "eval_steps": 3,
+  "global_step": 192,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.99,
       "eval_steps_per_second": 2.994,
       "step": 189
+    },
+    {
+      "epoch": 15.833333333333334,
+      "grad_norm": 0.0013418430462479591,
+      "learning_rate": 0.000195051270234986,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 15.916666666666666,
+      "grad_norm": 0.0016558875795453787,
+      "learning_rate": 0.00019499659605365404,
+      "loss": 0.0001,
+      "step": 191
+    },
+    {
+      "epoch": 16.0,
+      "grad_norm": 0.0026047020219266415,
+      "learning_rate": 0.0001949416292539326,
+      "loss": 0.0001,
+      "step": 192
+    },
+    {
+      "epoch": 16.0,
+      "eval_loss": 1.3221460580825806,
+      "eval_runtime": 1.0043,
+      "eval_samples_per_second": 4.979,
+      "eval_steps_per_second": 2.987,
+      "step": 192
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.36981317156864e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null