Training in progress, step 405, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b5849099de6d64da9163882e434d47ddd3f52875f754b124b878b5d8d76b11b
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3ba083b80a5ba86c78573397f43c5755304ed3a937a891778f373046687e85a
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1fedb2f50db670c47275e3ef8f58cd862a8dd1891a81406d1d6e8dbb0ca662b
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f7bdba3f023857413f670e1a267e69714b283f392e2ead25387966136304530
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e510d47805713e04f667951f9cdc1d488606c58294839c2a021b9c3bb3b8555
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:87c82db480424801d4bb33b59a50589e1c04688890a2475f7053ae263b3756f4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc533cf1bb7177235d15f56f9dac5f23ac9d59fa4a64c89a32769793623a6b44
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:33cf1fc2a7a88cf2a60a50e566aaa7e8972cd330e2ff6eb55fa333ccfaf32fd5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.517631834357813,
   "eval_steps": 386,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2823,6 +2823,41 @@
       "learning_rate": 9.793219548476753e-05,
       "loss": 0.8706,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2842,7 +2877,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.471748686774272e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5241022322872857,
   "eval_steps": 386,
+  "global_step": 405,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.793219548476753e-05,
       "loss": 0.8706,
       "step": 400
+    },
+    {
+      "epoch": 0.5189259139437076,
+      "grad_norm": 0.893902063369751,
+      "learning_rate": 9.792044529138674e-05,
+      "loss": 0.8217,
+      "step": 401
+    },
+    {
+      "epoch": 0.520219993529602,
+      "grad_norm": 0.7412934899330139,
+      "learning_rate": 9.79086625165405e-05,
+      "loss": 0.868,
+      "step": 402
+    },
+    {
+      "epoch": 0.5215140731154966,
+      "grad_norm": 0.796435534954071,
+      "learning_rate": 9.789684716823995e-05,
+      "loss": 0.8691,
+      "step": 403
+    },
+    {
+      "epoch": 0.5228081527013911,
+      "grad_norm": 1.063193440437317,
+      "learning_rate": 9.788499925451849e-05,
+      "loss": 1.0085,
+      "step": 404
+    },
+    {
+      "epoch": 0.5241022322872857,
+      "grad_norm": 0.952882707118988,
+      "learning_rate": 9.787311878343157e-05,
+      "loss": 0.8378,
+      "step": 405
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.5276455453589504e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null