Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3298409ff8f7747dc1cb2d83877f7dfffc02e42ea2a8fb25eddd0e8312800cd9
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f802a0148a738af610d29d4abfffdedfd1669043f7b6364627f018e6e028dee
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c45026f32aa4236d506f05644d56d326443ec9ae9f31b76f507173dc119c219d
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a84e8485abcc463e2e71a00c5169a08de029995712a2b498093e98c82de573c
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:973e0dcfdc80f91bbf73d10b756c8a1317ce2b15e2d637d4ea3fba8b5b3b24eb
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2b240c54b95dfa149637770da05b4b0da81b57b9528dcb187226d560175452f
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc9d7aba5456da6627e16310b385b9ea15bf2e2d87593d97e76b23be86169880
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f13bef3b08562f24a93a0a8ab3341192f7e86d649c8cb80fc5b3c09671b5df4
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d00d1a2575f1b19c9dca5a6790c086abf36f97dbf8b2963926a72f70d91c642
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7ce653ea80f1f3799fb0829c2b0f39d424b12aa707f6625210cb2e8da1f7cb1
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:362417982d4dfc8af9a516defc30442bd23de6dbca3e1a255c462c888784005a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:41f05f913444632461e3812eacd936898b5f0509b157cf18d17c46a083409ecb
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.032263268269075655,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 14.382,
       "eval_steps_per_second": 3.739,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.984041808658432e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.06452653653815131,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.382,
       "eval_steps_per_second": 3.739,
       "step": 25
+    },
+    {
+      "epoch": 0.03355379899983869,
+      "grad_norm": 0.7012822031974792,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.0063,
+      "step": 26
+    },
+    {
+      "epoch": 0.03484432973060171,
+      "grad_norm": 0.0,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.036134860461364736,
+      "grad_norm": 0.0,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.03742539119212776,
+      "grad_norm": 0.0,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 0.03871592192289079,
+      "grad_norm": 0.0,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.040006452653653816,
+      "grad_norm": 0.0,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 0.04129698338441684,
+      "grad_norm": 0.0,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 0.042587514115179866,
+      "grad_norm": 0.0,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 0.0438780448459429,
+      "grad_norm": 0.0,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 0.04516857557670592,
+      "grad_norm": 0.0,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.046459106307468946,
+      "grad_norm": 0.0,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 0.04774963703823197,
+      "grad_norm": 0.0,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 0.049040167768995,
+      "grad_norm": 2.208824396133423,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.0403,
+      "step": 38
+    },
+    {
+      "epoch": 0.05033069849975803,
+      "grad_norm": 0.0,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 0.05162122923052105,
+      "grad_norm": 0.0,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.052911759961284076,
+      "grad_norm": 0.0,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 0.0,
+      "step": 41
+    },
+    {
+      "epoch": 0.05420229069204711,
+      "grad_norm": 0.0,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 0.05549282142281013,
+      "grad_norm": 0.0,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 0.056783352153573156,
+      "grad_norm": 0.0,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 0.0,
+      "step": 44
+    },
+    {
+      "epoch": 0.05807388288433618,
+      "grad_norm": 0.0,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 0.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.05936441361509921,
+      "grad_norm": 0.0,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 0.06065494434586224,
+      "grad_norm": 0.0,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 0.0,
+      "step": 47
+    },
+    {
+      "epoch": 0.06194547507662526,
+      "grad_norm": 0.0,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 0.06323600580738829,
+      "grad_norm": 0.0,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 0.0,
+      "step": 49
+    },
+    {
+      "epoch": 0.06452653653815131,
+      "grad_norm": 0.0,
+      "learning_rate": 1e-05,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.06452653653815131,
+      "eval_loss": NaN,
+      "eval_runtime": 3.4858,
+      "eval_samples_per_second": 14.344,
+      "eval_steps_per_second": 3.729,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.968083617316864e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null