Training in progress, step 12600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1273,6 +1273,10 @@ You can finetune this model on your own dataset.
 | 0.2173 | 12300 | 0.3686        |
 | 0.2182 | 12350 | 0.4163        |
 | 0.2191 | 12400 | 0.3595        |
 ### Framework Versions

 | 0.2173 | 12300 | 0.3686        |
 | 0.2182 | 12350 | 0.4163        |
 | 0.2191 | 12400 | 0.3595        |
+| 0.2200 | 12450 | 0.3326        |
+| 0.2209 | 12500 | 0.3775        |
+| 0.2218 | 12550 | 0.3695        |
+| 0.2227 | 12600 | 0.3545        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:574b3805817f84c05fe61afaa266bdb741864872a08c8b02a26021f7f2d4b6a1
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:80936dfc8900c8ac8c5575eef651ee2d03a5cd89aa29046749ab69fc98c04269
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4ffa53be833c86425bcb85e6adb4b16c9a27186fd568e0bc2763a7d2449b3db
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ae8d02c618bd157f065f641eb6849f4a49c9cf111a69fc22cd74fafb816bffc
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ef1f923f6a1dc64581eaa57f689d3afe64e52f3a89bbade50c868ec911b07ce
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2810d275875e84011ae759c1d7b23d09c29defe9da62dd6f19a6663a8f613d62
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3af4348a2f962e0a24bd0d54fa3ca7b60bf2d2d90f2e31ee6b869eaaa1d91a60
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ce2d7b7dd5e5f39af67a8ead71654a635306c138883914bf7f8972e42520e3f
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd23584cb98398038b4f29f6fb0b60db3fae00d268a4c127abde12daed6ca267
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9386fe2e7b7d4f410bc4b63dcc7461a70d3aea6cca8295dc5a10ef7582b0f51
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2191161138696966,
   "eval_steps": 500,
-  "global_step": 12400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1744,6 +1744,34 @@
       "learning_rate": 4.3385168168698834e-05,
       "loss": 0.3595,
       "step": 12400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.22265024473856268,
   "eval_steps": 500,
+  "global_step": 12600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.3385168168698834e-05,
       "loss": 0.3595,
       "step": 12400
+    },
+    {
+      "epoch": 0.21999964658691312,
+      "grad_norm": 1.5602166652679443,
+      "learning_rate": 4.333608215036029e-05,
+      "loss": 0.3326,
+      "step": 12450
+    },
+    {
+      "epoch": 0.22088317930412962,
+      "grad_norm": 1.7230535745620728,
+      "learning_rate": 4.328699613202176e-05,
+      "loss": 0.3775,
+      "step": 12500
+    },
+    {
+      "epoch": 0.22176671202134615,
+      "grad_norm": 1.8666094541549683,
+      "learning_rate": 4.323791011368322e-05,
+      "loss": 0.3695,
+      "step": 12550
+    },
+    {
+      "epoch": 0.22265024473856268,
+      "grad_norm": 3.1689233779907227,
+      "learning_rate": 4.318882409534468e-05,
+      "loss": 0.3545,
+      "step": 12600
     }
   ],
   "logging_steps": 50,