Training in progress, step 20600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1435,6 +1435,10 @@ You can finetune this model on your own dataset.
 | 0.3587 | 20300 | 0.3618        |
 | 0.3596 | 20350 | 0.4093        |
 | 0.3605 | 20400 | 0.3051        |
 </details>

 | 0.3587 | 20300 | 0.3618        |
 | 0.3596 | 20350 | 0.4093        |
 | 0.3605 | 20400 | 0.3051        |
+| 0.3614 | 20450 | 0.3379        |
+| 0.3622 | 20500 | 0.3582        |
+| 0.3631 | 20550 | 0.4004        |
+| 0.3640 | 20600 | 0.33          |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e24285b7fee77e14610cf519cac1441a781f70c15e6815c8a668c74a1781441
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:97621d4e5f190418e5ade8172b856dbf29aec1661e21c76459517df300f5dd4b
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1038bc1f6238eff236fd9c4b9cc4d1b074566fa9976e684c92c8e7bc2cf6ef1
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:be75d2187899c775b5671ab017796b70cd2d877b78c139a3ee2fbd3172e957b9
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2529eb4186b974d86a3033c917ecbb272c298c66ef6254d4ce1dc8e6139bb050
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd6a169e48c0fa7774725b5e2e61834f9ebf4e77ad306763883a681dba8565aa
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ade052dc4070f716b1bebceb9d5e60fb9f374ab26eb8ae1c89ae4e4e0acc23c
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:df7b827b26a6c90ed6161d81717bb3711d81cb1be9e30f4a4b392ff6e398d8b0
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:884f056648270af6936f71ef50e4c4799a892dfb36fffd709576ecbe5d41efd5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:82191afd9a6d7a471cf24d902e6f469df00ed0ce9e065a1aca219368120bb6fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.36048134862433956,
   "eval_steps": 500,
-  "global_step": 20400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2864,6 +2864,34 @@
       "learning_rate": 3.5535332116000084e-05,
       "loss": 0.3051,
       "step": 20400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3640154794932056,
   "eval_steps": 500,
+  "global_step": 20600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.5535332116000084e-05,
       "loss": 0.3051,
       "step": 20400
+    },
+    {
+      "epoch": 0.3613648813415561,
+      "grad_norm": 1.4659417867660522,
+      "learning_rate": 3.548624609766155e-05,
+      "loss": 0.3379,
+      "step": 20450
+    },
+    {
+      "epoch": 0.3622484140587726,
+      "grad_norm": 1.520573616027832,
+      "learning_rate": 3.5437160079323007e-05,
+      "loss": 0.3582,
+      "step": 20500
+    },
+    {
+      "epoch": 0.3631319467759891,
+      "grad_norm": 2.158830165863037,
+      "learning_rate": 3.538807406098447e-05,
+      "loss": 0.4004,
+      "step": 20550
+    },
+    {
+      "epoch": 0.3640154794932056,
+      "grad_norm": 1.7503968477249146,
+      "learning_rate": 3.533898804264593e-05,
+      "loss": 0.33,
+      "step": 20600
     }
   ],
   "logging_steps": 50,