Training in progress, step 410, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3ba083b80a5ba86c78573397f43c5755304ed3a937a891778f373046687e85a
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:486f843fd963ad047e108ad6e2177b02194372d7badc986147d584a09dce4090
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f7bdba3f023857413f670e1a267e69714b283f392e2ead25387966136304530
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:09813614e11f944a54535b49907271670faa0fd78f62de6107511458c58eb3bc
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87c82db480424801d4bb33b59a50589e1c04688890a2475f7053ae263b3756f4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f7913630d73a37908980f47087984697274289698afc9a9db6afe7e4f8cb1c4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33cf1fc2a7a88cf2a60a50e566aaa7e8972cd330e2ff6eb55fa333ccfaf32fd5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:960df08ce745ddc135f0c492f6181a23fd839f7222d0b2b71af63986ebe995a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5241022322872857,
   "eval_steps": 386,
-  "global_step": 405,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2858,6 +2858,41 @@
       "learning_rate": 9.787311878343157e-05,
       "loss": 0.8378,
       "step": 405
     }
   ],
   "logging_steps": 1,
@@ -2877,7 +2912,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.5276455453589504e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5305726302167584,
   "eval_steps": 386,
+  "global_step": 410,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.787311878343157e-05,
       "loss": 0.8378,
       "step": 405
+    },
+    {
+      "epoch": 0.5253963118731803,
+      "grad_norm": 0.7899916768074036,
+      "learning_rate": 9.786120576305682e-05,
+      "loss": 0.8917,
+      "step": 406
+    },
+    {
+      "epoch": 0.5266903914590747,
+      "grad_norm": 0.8242781758308411,
+      "learning_rate": 9.784926020149398e-05,
+      "loss": 0.9778,
+      "step": 407
+    },
+    {
+      "epoch": 0.5279844710449693,
+      "grad_norm": 0.9736928343772888,
+      "learning_rate": 9.783728210686496e-05,
+      "loss": 1.0145,
+      "step": 408
+    },
+    {
+      "epoch": 0.5292785506308638,
+      "grad_norm": 0.8070263862609863,
+      "learning_rate": 9.782527148731372e-05,
+      "loss": 0.9923,
+      "step": 409
+    },
+    {
+      "epoch": 0.5305726302167584,
+      "grad_norm": 0.861262857913971,
+      "learning_rate": 9.781322835100638e-05,
+      "loss": 1.0261,
+      "step": 410
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.583542403943629e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null