Training in progress, step 86, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +89 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75f1d892b2366386761f0a72e1416c3a2e5e8a2c42705893fa8dc14c92a4967e
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:030eaddc6f6e92696098f138d1d6f183ae1feab61caf59dad9121a005f8a739a
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fce9ad8826004310c683743744f592950aa99030e99c0a773d97fb67c7cc572
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:73e75b1442d6f68ae9ec14ea3d9bb12f9dedda9bc5c41de5f976e4bb49cd2095
 size 118090

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a124254e9255126713591c1f59a9c70d9b2a575d52070bed3b63df36570aa65
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:66b4fed55d7442ec4f04242a594d7c198315a7d44290c90ffaea764d86aad661
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cf20479de077e1f12a1a0a2558891d641a7cd7f83617558364eb5fab3cecf50
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb08bb8a586d27643dcf86a9f8306def51a9136f80f5802d3aee00dc499c85b7
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d32bbee8c1a3203e5781cafb76382ca878bef03f828d2544598d5b9e442e046
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:41209214eef4e8962e9e3297d4c43c9981f0d838e4478bdc95bb660b06addb87
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44022d716771debca474b8d0807aaa6d48fb5a1f2aa65fce603e5767b76f790b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c58fa9498e2c74f0eb38ae93ff5adde3fe07458cf51af1baaaa3f0aa96d3fb9
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92c40e70c89071663742fb7f45e3d35ba3e327e558255bec76dafa8c4bde0527
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3515ffb61bbd99b16f0cf41af74761fa2ee8d9e372c8ce4c68c7f0ba42572ed2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.6315789473684212,
   "eval_steps": 8,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -612,6 +612,91 @@
       "learning_rate": 5.080450905401057e-06,
       "loss": 10.364,
       "step": 75
     }
   ],
   "logging_steps": 1,
@@ -626,12 +711,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 15690733977600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.017543859649123,
   "eval_steps": 8,
+  "global_step": 86,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.080450905401057e-06,
       "loss": 10.364,
       "step": 75
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.04788883775472641,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 10.3642,
+      "step": 76
+    },
+    {
+      "epoch": 2.7017543859649122,
+      "grad_norm": 0.05305058881640434,
+      "learning_rate": 3.420445597436056e-06,
+      "loss": 10.3684,
+      "step": 77
+    },
+    {
+      "epoch": 2.736842105263158,
+      "grad_norm": 0.04230741783976555,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 10.3681,
+      "step": 78
+    },
+    {
+      "epoch": 2.7719298245614032,
+      "grad_norm": 0.04892972111701965,
+      "learning_rate": 2.0786258770873647e-06,
+      "loss": 10.3648,
+      "step": 79
+    },
+    {
+      "epoch": 2.807017543859649,
+      "grad_norm": 0.05191851034760475,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 10.3678,
+      "step": 80
+    },
+    {
+      "epoch": 2.807017543859649,
+      "eval_loss": 10.36169147491455,
+      "eval_runtime": 0.0484,
+      "eval_samples_per_second": 1983.67,
+      "eval_steps_per_second": 61.99,
+      "step": 80
+    },
+    {
+      "epoch": 2.8421052631578947,
+      "grad_norm": 0.05835256725549698,
+      "learning_rate": 1.064157733632276e-06,
+      "loss": 10.3622,
+      "step": 81
+    },
+    {
+      "epoch": 2.8771929824561404,
+      "grad_norm": 0.051547639071941376,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 10.3642,
+      "step": 82
+    },
+    {
+      "epoch": 2.912280701754386,
+      "grad_norm": 0.05024786293506622,
+      "learning_rate": 3.839710131477492e-07,
+      "loss": 10.3686,
+      "step": 83
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 0.05746513977646828,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 10.3635,
+      "step": 84
+    },
+    {
+      "epoch": 2.982456140350877,
+      "grad_norm": 0.05468269810080528,
+      "learning_rate": 4.2712080634949024e-08,
+      "loss": 10.3656,
+      "step": 85
+    },
+    {
+      "epoch": 3.017543859649123,
+      "grad_norm": 0.0812983587384224,
+      "learning_rate": 0.0,
+      "loss": 15.6424,
+      "step": 86
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 17992041627648.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null