Training in progress, step 28600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1229,6 +1229,10 @@ You can finetune this model on your own dataset.
 | 0.5001 | 28300 | 0.3338        |
 | 0.5010 | 28350 | 0.3109        |
 | 0.5018 | 28400 | 0.3234        |
 ### Framework Versions

 | 0.5001 | 28300 | 0.3338        |
 | 0.5010 | 28350 | 0.3109        |
 | 0.5018 | 28400 | 0.3234        |
+| 0.5027 | 28450 | 0.3257        |
+| 0.5036 | 28500 | 0.4675        |
+| 0.5045 | 28550 | 0.4318        |
+| 0.5054 | 28600 | 0.3594        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63181bf09a1c611c492c8b6ad279006fb56b9805969d0edba71cff3ee723a234
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1974b8b79f821393033413b708b3d76af02ee0630fc769c360d30160ba1e49c
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1746c035958eb9e0a972414e56f8fa4733c0402b003adb0be00e93e8ce71e93
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:80de25a64d3a7f9b01d3ac384a24ef9565f911507016f6cb2ebb2675cd2fca9a
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:febec73de51eb7db9177092196a53a5124fb83edff44db6d3e9496c1ee972476
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:104316cbc87d14059ece260dae5c5774e3d91c358c1eb53b1e4cc6f9aa5826ec
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5147f98e53184b8dd37c8115204cda76018272755476678192eccb1efdd5d07
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ed0a067ae758f61df93eb0349389528a459caaf90517ed5419a4c3a8ab14656
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:953c76ee5b14f1426684c90829d80bb305b640c5a3e12732ffb64cce4b8f5126
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c41c0a76cd352649e53d0354e3152b28ade5f9653e46110b22a1db6134af332
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5018465833789825,
   "eval_steps": 500,
-  "global_step": 28400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3984,6 +3984,34 @@
       "learning_rate": 2.768549606330133e-05,
       "loss": 0.3234,
       "step": 28400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5053807142478486,
   "eval_steps": 500,
+  "global_step": 28600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.768549606330133e-05,
       "loss": 0.3234,
       "step": 28400
+    },
+    {
+      "epoch": 0.502730116096199,
+      "grad_norm": 1.497454285621643,
+      "learning_rate": 2.7636410044962795e-05,
+      "loss": 0.3257,
+      "step": 28450
+    },
+    {
+      "epoch": 0.5036136488134155,
+      "grad_norm": 3.3886194229125977,
+      "learning_rate": 2.7587324026624256e-05,
+      "loss": 0.4675,
+      "step": 28500
+    },
+    {
+      "epoch": 0.504497181530632,
+      "grad_norm": 1.6604270935058594,
+      "learning_rate": 2.753823800828572e-05,
+      "loss": 0.4318,
+      "step": 28550
+    },
+    {
+      "epoch": 0.5053807142478486,
+      "grad_norm": 1.7005223035812378,
+      "learning_rate": 2.7489151989947186e-05,
+      "loss": 0.3594,
+      "step": 28600
     }
   ],
   "logging_steps": 50,