Training in progress, step 24000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1503,6 +1503,10 @@ You can finetune this model on your own dataset.
 | 0.4188 | 23700 | 0.2905        |
 | 0.4197 | 23750 | 0.3234        |
 | 0.4206 | 23800 | 0.3063        |
 </details>

 | 0.4188 | 23700 | 0.2905        |
 | 0.4197 | 23750 | 0.3234        |
 | 0.4206 | 23800 | 0.3063        |
+| 0.4214 | 23850 | 0.3386        |
+| 0.4223 | 23900 | 0.3431        |
+| 0.4232 | 23950 | 0.2902        |
+| 0.4241 | 24000 | 0.3136        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da25745d4f384214e5fd8311c12900458d1b5b86d594566fce244180fc108392
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f90e824e1a9ffb638cde5da629726eeb7bc12cd6362fbb12d7d04ce5372c8f5
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b4992703dc226d67e2557f70e2f6a89c68bca3aed88eebebef64b02c8918852
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9a99a7427fcc196b3d93d1ed84e8a7ea1d28d3ea047d1257af87399a72693de
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f2a71677d5d8b3309c52c3c93b3a28a28f9259cc007d0be9b71770e4f3e0700
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:66083f07808429ecf1c592a46d5e2fa5fd55c02b1303e07c74cebcc8fc44243c
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:508437f27d03b8b5ea47dcee3f87a11f21e258b0021c269cc27deb7dd1d6dca5
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:367e0d39f4bf5ffc122ff5c01987f9ae3492dd527c06b1748807449df9f2581e
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6db4f21bcf6330e113ebe3fc1f667f5da112ad8caba02a1bdc8f265e3ca1aa1d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:33058f7a86733565c00edc7dc90e657260ca9633bceae0708704d9688e1a076c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.42056157339506284,
   "eval_steps": 500,
-  "global_step": 23800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3340,6 +3340,34 @@
       "learning_rate": 3.2199446309713146e-05,
       "loss": 0.3063,
       "step": 23800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4240957042639289,
   "eval_steps": 500,
+  "global_step": 24000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.2199446309713146e-05,
       "loss": 0.3063,
       "step": 23800
+    },
+    {
+      "epoch": 0.4214451061122793,
+      "grad_norm": 1.289444923400879,
+      "learning_rate": 3.2150360291374604e-05,
+      "loss": 0.3386,
+      "step": 23850
+    },
+    {
+      "epoch": 0.42232863882949584,
+      "grad_norm": 2.95922589302063,
+      "learning_rate": 3.210127427303607e-05,
+      "loss": 0.3431,
+      "step": 23900
+    },
+    {
+      "epoch": 0.42321217154671237,
+      "grad_norm": 1.6753530502319336,
+      "learning_rate": 3.2052188254697534e-05,
+      "loss": 0.2902,
+      "step": 23950
+    },
+    {
+      "epoch": 0.4240957042639289,
+      "grad_norm": 1.6901003122329712,
+      "learning_rate": 3.2003102236359e-05,
+      "loss": 0.3136,
+      "step": 24000
     }
   ],
   "logging_steps": 50,