Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f156546a33a0b3df9de98e545f2a1381887bdf2ca21b83f3144118e479e8bc45
 size 377538512

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddbe0bbc7bc3f64e6bb3a3b2bc1148a85360541a79f1ac183c1e2571b2cdaf1d
 size 377538512

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:989cc60fef974cce3d2e52860771996aab98ccf0cc9e291585e580fe1095ce58
 size 192144596

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f8e09fa5a7b89fdc627e7fb0ab33bd6a2cd53c57dcfa1fd07928ccbbfd8e6a0
 size 192144596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ef85424c2e3f2f201ac5baf98c81bebf53fed2983bb832a0023baf9b22559e0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7521f647d499beacf8b3764e583dc0941bedaaef287d35ae32df0a37bf00e08
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.0103774070739746,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.0979431929480901,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 21.121,
       "eval_steps_per_second": 5.282,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.102870343745536e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0028241872787476,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1305909239307868,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.121,
       "eval_steps_per_second": 5.282,
       "step": 150
+    },
+    {
+      "epoch": 0.09859614756774404,
+      "grad_norm": 2.021636724472046,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 0.9084,
+      "step": 151
+    },
+    {
+      "epoch": 0.09924910218739798,
+      "grad_norm": 2.2391059398651123,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 0.9427,
+      "step": 152
+    },
+    {
+      "epoch": 0.09990205680705191,
+      "grad_norm": 3.0561001300811768,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 0.8915,
+      "step": 153
+    },
+    {
+      "epoch": 0.10055501142670584,
+      "grad_norm": 2.604928493499756,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 0.9242,
+      "step": 154
+    },
+    {
+      "epoch": 0.10120796604635977,
+      "grad_norm": 2.782693386077881,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 0.923,
+      "step": 155
+    },
+    {
+      "epoch": 0.10186092066601371,
+      "grad_norm": 2.6287972927093506,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 0.9864,
+      "step": 156
+    },
+    {
+      "epoch": 0.10251387528566765,
+      "grad_norm": 2.7638707160949707,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 0.9488,
+      "step": 157
+    },
+    {
+      "epoch": 0.10316682990532158,
+      "grad_norm": 3.8539583683013916,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 0.9282,
+      "step": 158
+    },
+    {
+      "epoch": 0.10381978452497552,
+      "grad_norm": 2.9983620643615723,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 0.9185,
+      "step": 159
+    },
+    {
+      "epoch": 0.10447273914462944,
+      "grad_norm": 2.6667168140411377,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 1.0198,
+      "step": 160
+    },
+    {
+      "epoch": 0.10512569376428338,
+      "grad_norm": 2.199368476867676,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 0.9514,
+      "step": 161
+    },
+    {
+      "epoch": 0.10577864838393732,
+      "grad_norm": 2.339479684829712,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 0.8627,
+      "step": 162
+    },
+    {
+      "epoch": 0.10643160300359125,
+      "grad_norm": 2.4051907062530518,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 0.9412,
+      "step": 163
+    },
+    {
+      "epoch": 0.10708455762324519,
+      "grad_norm": 2.6235029697418213,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 0.9661,
+      "step": 164
+    },
+    {
+      "epoch": 0.10773751224289912,
+      "grad_norm": 2.3630259037017822,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 1.161,
+      "step": 165
+    },
+    {
+      "epoch": 0.10839046686255305,
+      "grad_norm": 2.62069034576416,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 1.0818,
+      "step": 166
+    },
+    {
+      "epoch": 0.10904342148220698,
+      "grad_norm": 3.125945568084717,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 1.0923,
+      "step": 167
+    },
+    {
+      "epoch": 0.10969637610186092,
+      "grad_norm": 2.823786497116089,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 0.9918,
+      "step": 168
+    },
+    {
+      "epoch": 0.11034933072151486,
+      "grad_norm": 3.086836338043213,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 1.0645,
+      "step": 169
+    },
+    {
+      "epoch": 0.1110022853411688,
+      "grad_norm": 2.968472719192505,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 1.0943,
+      "step": 170
+    },
+    {
+      "epoch": 0.11165523996082272,
+      "grad_norm": 3.3311610221862793,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 1.0482,
+      "step": 171
+    },
+    {
+      "epoch": 0.11230819458047665,
+      "grad_norm": 2.8356263637542725,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 1.1126,
+      "step": 172
+    },
+    {
+      "epoch": 0.11296114920013059,
+      "grad_norm": 3.495161294937134,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 1.1292,
+      "step": 173
+    },
+    {
+      "epoch": 0.11361410381978453,
+      "grad_norm": 3.611652135848999,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 1.2087,
+      "step": 174
+    },
+    {
+      "epoch": 0.11426705843943846,
+      "grad_norm": 3.4223086833953857,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 1.0086,
+      "step": 175
+    },
+    {
+      "epoch": 0.1149200130590924,
+      "grad_norm": 3.590174913406372,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 1.0571,
+      "step": 176
+    },
+    {
+      "epoch": 0.11557296767874632,
+      "grad_norm": 5.3133649826049805,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 1.079,
+      "step": 177
+    },
+    {
+      "epoch": 0.11622592229840026,
+      "grad_norm": 4.763728141784668,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 1.1924,
+      "step": 178
+    },
+    {
+      "epoch": 0.1168788769180542,
+      "grad_norm": 4.005756378173828,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 1.1013,
+      "step": 179
+    },
+    {
+      "epoch": 0.11753183153770813,
+      "grad_norm": 4.59070348739624,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 1.0473,
+      "step": 180
+    },
+    {
+      "epoch": 0.11818478615736207,
+      "grad_norm": 5.296746253967285,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 1.2648,
+      "step": 181
+    },
+    {
+      "epoch": 0.11883774077701599,
+      "grad_norm": 4.656338691711426,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 1.0275,
+      "step": 182
+    },
+    {
+      "epoch": 0.11949069539666993,
+      "grad_norm": 5.707005977630615,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 1.1523,
+      "step": 183
+    },
+    {
+      "epoch": 0.12014365001632386,
+      "grad_norm": 5.811980247497559,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 1.1355,
+      "step": 184
+    },
+    {
+      "epoch": 0.1207966046359778,
+      "grad_norm": 5.786341190338135,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 1.0562,
+      "step": 185
+    },
+    {
+      "epoch": 0.12144955925563174,
+      "grad_norm": 9.454066276550293,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 1.1791,
+      "step": 186
+    },
+    {
+      "epoch": 0.12210251387528567,
+      "grad_norm": 7.248098850250244,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 0.9874,
+      "step": 187
+    },
+    {
+      "epoch": 0.1227554684949396,
+      "grad_norm": 6.36809778213501,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 0.9503,
+      "step": 188
+    },
+    {
+      "epoch": 0.12340842311459353,
+      "grad_norm": 6.659921646118164,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 1.0327,
+      "step": 189
+    },
+    {
+      "epoch": 0.12406137773424747,
+      "grad_norm": 6.8771562576293945,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 0.9623,
+      "step": 190
+    },
+    {
+      "epoch": 0.1247143323539014,
+      "grad_norm": 6.988889694213867,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 1.0579,
+      "step": 191
+    },
+    {
+      "epoch": 0.12536728697355534,
+      "grad_norm": 7.6399641036987305,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 0.8439,
+      "step": 192
+    },
+    {
+      "epoch": 0.12602024159320926,
+      "grad_norm": 10.259600639343262,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 1.2599,
+      "step": 193
+    },
+    {
+      "epoch": 0.12667319621286321,
+      "grad_norm": 11.773065567016602,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 1.0644,
+      "step": 194
+    },
+    {
+      "epoch": 0.12732615083251714,
+      "grad_norm": 7.667246341705322,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 1.133,
+      "step": 195
+    },
+    {
+      "epoch": 0.12797910545217106,
+      "grad_norm": 9.011076927185059,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 0.992,
+      "step": 196
+    },
+    {
+      "epoch": 0.128632060071825,
+      "grad_norm": 16.59012794494629,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 1.1446,
+      "step": 197
+    },
+    {
+      "epoch": 0.12928501469147893,
+      "grad_norm": 13.964167594909668,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 1.1686,
+      "step": 198
+    },
+    {
+      "epoch": 0.12993796931113288,
+      "grad_norm": 9.262879371643066,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 0.8989,
+      "step": 199
+    },
+    {
+      "epoch": 0.1305909239307868,
+      "grad_norm": 14.184370994567871,
+      "learning_rate": 0.0,
+      "loss": 1.3632,
+      "step": 200
+    },
+    {
+      "epoch": 0.1305909239307868,
+      "eval_loss": 1.0028241872787476,
+      "eval_runtime": 122.1359,
+      "eval_samples_per_second": 21.116,
+      "eval_steps_per_second": 5.281,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0799333020532736e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null