Training in progress, step 23800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1499,6 +1499,10 @@ You can finetune this model on your own dataset.
 | 0.4153 | 23500 | 0.3987        |
 | 0.4161 | 23550 | 0.3387        |
 | 0.4170 | 23600 | 0.2989        |
 </details>

 | 0.4153 | 23500 | 0.3987        |
 | 0.4161 | 23550 | 0.3387        |
 | 0.4170 | 23600 | 0.2989        |
+| 0.4179 | 23650 | 0.2629        |
+| 0.4188 | 23700 | 0.2905        |
+| 0.4197 | 23750 | 0.3234        |
+| 0.4206 | 23800 | 0.3063        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1617eb2ae4888507c4f4075423705e736487e0fd06011313c271b8a67d2121e7
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:da25745d4f384214e5fd8311c12900458d1b5b86d594566fce244180fc108392
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8082e636e89c0305931d4fed9e511d53d0c861249cb9eb1baa51ec94b573d123
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b4992703dc226d67e2557f70e2f6a89c68bca3aed88eebebef64b02c8918852
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7c3077f3b0e21db426cf04aaf6706b3f8e724b43a1c804482891604f1539c3f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f2a71677d5d8b3309c52c3c93b3a28a28f9259cc007d0be9b71770e4f3e0700
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2a1548d715b309492a66002f720121ae6b58979a558a4ea26d5d559620bd59b
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:508437f27d03b8b5ea47dcee3f87a11f21e258b0021c269cc27deb7dd1d6dca5
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c549b0e10abd21bebaa5ec4fd4b6a6e95036a423d8901ec4f127ce499a3bb98
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6db4f21bcf6330e113ebe3fc1f667f5da112ad8caba02a1bdc8f265e3ca1aa1d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4170274425261967,
   "eval_steps": 500,
-  "global_step": 23600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3312,6 +3312,34 @@
       "learning_rate": 3.239579038306729e-05,
       "loss": 0.2989,
       "step": 23600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.42056157339506284,
   "eval_steps": 500,
+  "global_step": 23800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.239579038306729e-05,
       "loss": 0.2989,
       "step": 23600
+    },
+    {
+      "epoch": 0.41791097524341325,
+      "grad_norm": 1.4236600399017334,
+      "learning_rate": 3.234670436472875e-05,
+      "loss": 0.2629,
+      "step": 23650
+    },
+    {
+      "epoch": 0.4187945079606298,
+      "grad_norm": 3.2101380825042725,
+      "learning_rate": 3.229761834639022e-05,
+      "loss": 0.2905,
+      "step": 23700
+    },
+    {
+      "epoch": 0.4196780406778463,
+      "grad_norm": 1.3380919694900513,
+      "learning_rate": 3.2248532328051675e-05,
+      "loss": 0.3234,
+      "step": 23750
+    },
+    {
+      "epoch": 0.42056157339506284,
+      "grad_norm": 1.5015414953231812,
+      "learning_rate": 3.2199446309713146e-05,
+      "loss": 0.3063,
+      "step": 23800
     }
   ],
   "logging_steps": 50,