Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:892c06cded51c96cb3873fe13e8d63b3884edf4098b831f0f127ff20f2e37b07
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:808b99178ff1da16c453bc54650f89ed7a21fc8b87d4ee558f213b02ba51080a
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7882fe7b9f2004627c33042670a2c50fcedae14a71038f095fd0d4cc6517352
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:b531bbf50a6e1cf066afabc1b8edf0119271c43d9807bf177a97810e38be57bd
 size 202110330

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cab7e519ead804180d3c4604c76e68eef9a913c742ae61564b48a4cae0f4089e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b2a9bacd5031ad56e01b036d9518b8a794e8a27e1bce5b9bb838013dc20ca5f
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dcf1a3ba28138a48d412fd1ee515eec64e348588fa2d0eab3aa3990929ea594
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a543cdbb686c5b74707f45735b2a1c776b07e077c568a7895d27c0af6b50dfa7
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a15eb86472ca14c720ec853cfa573df7dffdf02582ac67964b696a534b2643a3
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:59e9d227ef604577ef8b4397806e71173f8ed7566468c15799fd24ba821047c1
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e7ab73e57b4dfeb27c8e8527de1ae263fca0fac870b5cb298f985b66ebe5a8d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:89e8595be2e39753ec7d305650418f0cbcee136c01112c7b42e3cbed071ed0ac
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.584397315979004,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.5118362124120281,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 29.299,
       "eval_steps_per_second": 7.618,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.16708196040704e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.0633466243743896,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.033909149072297,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.299,
       "eval_steps_per_second": 7.618,
       "step": 25
+    },
+    {
+      "epoch": 0.5323096609085093,
+      "grad_norm": 9.44233226776123,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 4.2071,
+      "step": 26
+    },
+    {
+      "epoch": 0.5527831094049904,
+      "grad_norm": 5.62533712387085,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 4.4023,
+      "step": 27
+    },
+    {
+      "epoch": 0.5732565579014716,
+      "grad_norm": 6.5841851234436035,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 3.6272,
+      "step": 28
+    },
+    {
+      "epoch": 0.5937300063979527,
+      "grad_norm": 3.9638733863830566,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 3.6042,
+      "step": 29
+    },
+    {
+      "epoch": 0.6142034548944337,
+      "grad_norm": 3.8045289516448975,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 3.8018,
+      "step": 30
+    },
+    {
+      "epoch": 0.6346769033909149,
+      "grad_norm": 4.675745010375977,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 4.1539,
+      "step": 31
+    },
+    {
+      "epoch": 0.655150351887396,
+      "grad_norm": 7.304325103759766,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 2.5739,
+      "step": 32
+    },
+    {
+      "epoch": 0.6756238003838771,
+      "grad_norm": 8.27237319946289,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 1.9565,
+      "step": 33
+    },
+    {
+      "epoch": 0.6960972488803583,
+      "grad_norm": 4.600338459014893,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 2.6638,
+      "step": 34
+    },
+    {
+      "epoch": 0.7165706973768394,
+      "grad_norm": 6.785358428955078,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 3.8978,
+      "step": 35
+    },
+    {
+      "epoch": 0.7370441458733206,
+      "grad_norm": 13.49142074584961,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 3.8446,
+      "step": 36
+    },
+    {
+      "epoch": 0.7575175943698017,
+      "grad_norm": 5.301874160766602,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 4.1858,
+      "step": 37
+    },
+    {
+      "epoch": 0.7779910428662828,
+      "grad_norm": 5.839453220367432,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 3.7766,
+      "step": 38
+    },
+    {
+      "epoch": 0.7984644913627639,
+      "grad_norm": 4.404445648193359,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 3.9701,
+      "step": 39
+    },
+    {
+      "epoch": 0.818937939859245,
+      "grad_norm": 4.650479793548584,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 3.3742,
+      "step": 40
+    },
+    {
+      "epoch": 0.8394113883557262,
+      "grad_norm": 3.8081395626068115,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 3.4152,
+      "step": 41
+    },
+    {
+      "epoch": 0.8598848368522073,
+      "grad_norm": 3.4957478046417236,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 3.4227,
+      "step": 42
+    },
+    {
+      "epoch": 0.8803582853486884,
+      "grad_norm": 3.3087618350982666,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 3.4167,
+      "step": 43
+    },
+    {
+      "epoch": 0.9008317338451696,
+      "grad_norm": 6.3253397941589355,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 2.0345,
+      "step": 44
+    },
+    {
+      "epoch": 0.9213051823416507,
+      "grad_norm": 6.485597133636475,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 1.5626,
+      "step": 45
+    },
+    {
+      "epoch": 0.9417786308381318,
+      "grad_norm": 2.990126848220825,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 2.5906,
+      "step": 46
+    },
+    {
+      "epoch": 0.9622520793346129,
+      "grad_norm": 8.486502647399902,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 4.0102,
+      "step": 47
+    },
+    {
+      "epoch": 0.982725527831094,
+      "grad_norm": 13.994930267333984,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 3.7827,
+      "step": 48
+    },
+    {
+      "epoch": 1.0134357005758157,
+      "grad_norm": 8.235879898071289,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 6.6262,
+      "step": 49
+    },
+    {
+      "epoch": 1.033909149072297,
+      "grad_norm": 5.423706531524658,
+      "learning_rate": 1e-05,
+      "loss": 3.513,
+      "step": 50
+    },
+    {
+      "epoch": 1.033909149072297,
+      "eval_loss": 3.0633466243743896,
+      "eval_runtime": 1.7028,
+      "eval_samples_per_second": 29.364,
+      "eval_steps_per_second": 7.635,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.33416392081408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null