Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6657a226cc0c3e14f29fa370deb9e3caa775840677e244efcc32831037a36d5a
 size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:5733fd572a041502fd0f75b54fee4ddeec0a77d42518c275e38825d19812baa9
 size 1001465824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c018df9e83db2d92c241d99012be2dcc0322a1f2d180ac4b86bd9a7a2f4541b
 size 509176980

 version https://git-lfs.github.com/spec/v1
+oid sha256:41c0d45e892b0c1ccd73d13415e605977578fc34d02e69d0d25733dfa68394d4
 size 509176980

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df1f766df65bea57494f01e4f4c52272697127fbef8384c36b4d737b1bb7c772
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cb8a67fd05d3419c045fb4224f49b4687799b59240231eb4e285b2a2ccff3c9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970068ebf9c0dc6a40c93653c563bb0b2ba5296a6c46496b504a3f1343bf3a62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7df65c8f4b426598b0abc4173b1983dcf7411aee63ea4061d980eae7a1af2363
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.14084061980247498,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.6302521008403361,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 1.623,
       "eval_steps_per_second": 0.409,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.86404819402752e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.10804670304059982,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.8403361344537815,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.623,
       "eval_steps_per_second": 0.409,
       "step": 150
+    },
+    {
+      "epoch": 0.634453781512605,
+      "grad_norm": 0.9199975728988647,
+      "learning_rate": 1.7085562498478883e-05,
+      "loss": 0.0577,
+      "step": 151
+    },
+    {
+      "epoch": 0.6386554621848739,
+      "grad_norm": 0.38636523485183716,
+      "learning_rate": 1.6431976865013128e-05,
+      "loss": 0.039,
+      "step": 152
+    },
+    {
+      "epoch": 0.6428571428571429,
+      "grad_norm": 0.2471998631954193,
+      "learning_rate": 1.5788935343973164e-05,
+      "loss": 0.0295,
+      "step": 153
+    },
+    {
+      "epoch": 0.6470588235294118,
+      "grad_norm": 0.1585417538881302,
+      "learning_rate": 1.5156613736490108e-05,
+      "loss": 0.0266,
+      "step": 154
+    },
+    {
+      "epoch": 0.6512605042016807,
+      "grad_norm": 0.06748829782009125,
+      "learning_rate": 1.4535184912977763e-05,
+      "loss": 0.0251,
+      "step": 155
+    },
+    {
+      "epoch": 0.6554621848739496,
+      "grad_norm": 0.06747037917375565,
+      "learning_rate": 1.3924818765871553e-05,
+      "loss": 0.0306,
+      "step": 156
+    },
+    {
+      "epoch": 0.6596638655462185,
+      "grad_norm": 0.07344760745763779,
+      "learning_rate": 1.3325682163181601e-05,
+      "loss": 0.0268,
+      "step": 157
+    },
+    {
+      "epoch": 0.6638655462184874,
+      "grad_norm": 0.08845263719558716,
+      "learning_rate": 1.2737938902872767e-05,
+      "loss": 0.0297,
+      "step": 158
+    },
+    {
+      "epoch": 0.6680672268907563,
+      "grad_norm": 0.10601629316806793,
+      "learning_rate": 1.2161749668083823e-05,
+      "loss": 0.0277,
+      "step": 159
+    },
+    {
+      "epoch": 0.6722689075630253,
+      "grad_norm": 0.2715802788734436,
+      "learning_rate": 1.159727198319836e-05,
+      "loss": 0.0282,
+      "step": 160
+    },
+    {
+      "epoch": 0.6764705882352942,
+      "grad_norm": 0.06532086431980133,
+      "learning_rate": 1.1044660170779142e-05,
+      "loss": 0.0314,
+      "step": 161
+    },
+    {
+      "epoch": 0.680672268907563,
+      "grad_norm": 0.05947871133685112,
+      "learning_rate": 1.0504065309377897e-05,
+      "loss": 0.0366,
+      "step": 162
+    },
+    {
+      "epoch": 0.6848739495798319,
+      "grad_norm": 0.05421306937932968,
+      "learning_rate": 9.97563519223192e-06,
+      "loss": 0.0324,
+      "step": 163
+    },
+    {
+      "epoch": 0.6890756302521008,
+      "grad_norm": 0.056551430374383926,
+      "learning_rate": 9.459514286858898e-06,
+      "loss": 0.0326,
+      "step": 164
+    },
+    {
+      "epoch": 0.6932773109243697,
+      "grad_norm": 0.05368305742740631,
+      "learning_rate": 8.95584369556093e-06,
+      "loss": 0.0364,
+      "step": 165
+    },
+    {
+      "epoch": 0.6974789915966386,
+      "grad_norm": 0.05321598798036575,
+      "learning_rate": 8.464761116848546e-06,
+      "loss": 0.0368,
+      "step": 166
+    },
+    {
+      "epoch": 0.7016806722689075,
+      "grad_norm": 0.05156972259283066,
+      "learning_rate": 7.986400807795349e-06,
+      "loss": 0.0284,
+      "step": 167
+    },
+    {
+      "epoch": 0.7058823529411765,
+      "grad_norm": 0.04861694574356079,
+      "learning_rate": 7.520893547333436e-06,
+      "loss": 0.0319,
+      "step": 168
+    },
+    {
+      "epoch": 0.7100840336134454,
+      "grad_norm": 0.05087107792496681,
+      "learning_rate": 7.068366600499744e-06,
+      "loss": 0.0319,
+      "step": 169
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.045963507145643234,
+      "learning_rate": 6.6289436836431076e-06,
+      "loss": 0.0351,
+      "step": 170
+    },
+    {
+      "epoch": 0.7184873949579832,
+      "grad_norm": 0.0428263358771801,
+      "learning_rate": 6.20274493060135e-06,
+      "loss": 0.0327,
+      "step": 171
+    },
+    {
+      "epoch": 0.7226890756302521,
+      "grad_norm": 0.048912014812231064,
+      "learning_rate": 5.789886859858009e-06,
+      "loss": 0.0373,
+      "step": 172
+    },
+    {
+      "epoch": 0.726890756302521,
+      "grad_norm": 0.04380197450518608,
+      "learning_rate": 5.3904823426872605e-06,
+      "loss": 0.0398,
+      "step": 173
+    },
+    {
+      "epoch": 0.7310924369747899,
+      "grad_norm": 0.04742840304970741,
+      "learning_rate": 5.004640572296062e-06,
+      "loss": 0.0375,
+      "step": 174
+    },
+    {
+      "epoch": 0.7352941176470589,
+      "grad_norm": 0.04380054399371147,
+      "learning_rate": 4.632467033971838e-06,
+      "loss": 0.0413,
+      "step": 175
+    },
+    {
+      "epoch": 0.7394957983193278,
+      "grad_norm": 0.03939207270741463,
+      "learning_rate": 4.274063476243839e-06,
+      "loss": 0.0362,
+      "step": 176
+    },
+    {
+      "epoch": 0.7436974789915967,
+      "grad_norm": 0.04470328986644745,
+      "learning_rate": 3.929527883066117e-06,
+      "loss": 0.0517,
+      "step": 177
+    },
+    {
+      "epoch": 0.7478991596638656,
+      "grad_norm": 0.045310597866773605,
+      "learning_rate": 3.5989544470296595e-06,
+      "loss": 0.0505,
+      "step": 178
+    },
+    {
+      "epoch": 0.7521008403361344,
+      "grad_norm": 0.04871699959039688,
+      "learning_rate": 3.282433543611136e-06,
+      "loss": 0.0554,
+      "step": 179
+    },
+    {
+      "epoch": 0.7563025210084033,
+      "grad_norm": 0.050524331629276276,
+      "learning_rate": 2.980051706465095e-06,
+      "loss": 0.0583,
+      "step": 180
+    },
+    {
+      "epoch": 0.7605042016806722,
+      "grad_norm": 0.05556650459766388,
+      "learning_rate": 2.691891603766556e-06,
+      "loss": 0.0576,
+      "step": 181
+    },
+    {
+      "epoch": 0.7647058823529411,
+      "grad_norm": 0.06121022626757622,
+      "learning_rate": 2.4180320156103298e-06,
+      "loss": 0.0746,
+      "step": 182
+    },
+    {
+      "epoch": 0.7689075630252101,
+      "grad_norm": 0.06415616720914841,
+      "learning_rate": 2.158547812473352e-06,
+      "loss": 0.073,
+      "step": 183
+    },
+    {
+      "epoch": 0.773109243697479,
+      "grad_norm": 0.07119602710008621,
+      "learning_rate": 1.9135099347458293e-06,
+      "loss": 0.0778,
+      "step": 184
+    },
+    {
+      "epoch": 0.7773109243697479,
+      "grad_norm": 0.08688875287771225,
+      "learning_rate": 1.6829853733368294e-06,
+      "loss": 0.0791,
+      "step": 185
+    },
+    {
+      "epoch": 0.7815126050420168,
+      "grad_norm": 0.09698156267404556,
+      "learning_rate": 1.4670371513596842e-06,
+      "loss": 0.0907,
+      "step": 186
+    },
+    {
+      "epoch": 0.7857142857142857,
+      "grad_norm": 0.11040172725915909,
+      "learning_rate": 1.2657243069020402e-06,
+      "loss": 0.0904,
+      "step": 187
+    },
+    {
+      "epoch": 0.7899159663865546,
+      "grad_norm": 0.13165590167045593,
+      "learning_rate": 1.0791018768854855e-06,
+      "loss": 0.1196,
+      "step": 188
+    },
+    {
+      "epoch": 0.7941176470588235,
+      "grad_norm": 0.14704085886478424,
+      "learning_rate": 9.072208820189698e-07,
+      "loss": 0.1128,
+      "step": 189
+    },
+    {
+      "epoch": 0.7983193277310925,
+      "grad_norm": 0.16108661890029907,
+      "learning_rate": 7.501283128502722e-07,
+      "loss": 0.1301,
+      "step": 190
+    },
+    {
+      "epoch": 0.8025210084033614,
+      "grad_norm": 0.19200128316879272,
+      "learning_rate": 6.07867116919233e-07,
+      "loss": 0.1524,
+      "step": 191
+    },
+    {
+      "epoch": 0.8067226890756303,
+      "grad_norm": 0.20474770665168762,
+      "learning_rate": 4.804761870163643e-07,
+      "loss": 0.1627,
+      "step": 192
+    },
+    {
+      "epoch": 0.8109243697478992,
+      "grad_norm": 0.20066982507705688,
+      "learning_rate": 3.6799035054990215e-07,
+      "loss": 0.1791,
+      "step": 193
+    },
+    {
+      "epoch": 0.8151260504201681,
+      "grad_norm": 0.18784397840499878,
+      "learning_rate": 2.704403600243721e-07,
+      "loss": 0.2186,
+      "step": 194
+    },
+    {
+      "epoch": 0.819327731092437,
+      "grad_norm": 0.17218390107154846,
+      "learning_rate": 1.878528846331584e-07,
+      "loss": 0.2742,
+      "step": 195
+    },
+    {
+      "epoch": 0.8235294117647058,
+      "grad_norm": 0.17353031039237976,
+      "learning_rate": 1.202505029674006e-07,
+      "loss": 0.3294,
+      "step": 196
+    },
+    {
+      "epoch": 0.8277310924369747,
+      "grad_norm": 0.2161194235086441,
+      "learning_rate": 6.765169684323947e-08,
+      "loss": 0.428,
+      "step": 197
+    },
+    {
+      "epoch": 0.8319327731092437,
+      "grad_norm": 0.2484060823917389,
+      "learning_rate": 3.007084624906731e-08,
+      "loss": 0.6874,
+      "step": 198
+    },
+    {
+      "epoch": 0.8361344537815126,
+      "grad_norm": 0.3053840696811676,
+      "learning_rate": 7.518225414204771e-09,
+      "loss": 0.981,
+      "step": 199
+    },
+    {
+      "epoch": 0.8403361344537815,
+      "grad_norm": 0.340952605009079,
+      "learning_rate": 0.0,
+      "loss": 1.0018,
+      "step": 200
+    },
+    {
+      "epoch": 0.8403361344537815,
+      "eval_loss": 0.10804670304059982,
+      "eval_runtime": 226.7217,
+      "eval_samples_per_second": 1.769,
+      "eval_steps_per_second": 0.445,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.15206425870336e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null