Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48091c9a54fd90c725dbd9f460f68dd6dbaf04b09e02dc0f4e9559453c0a84a3
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:c48fd3c19921a7abaa0ad76f391d1edcb686a03d176d205cb545efc13b0a9a05
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b35b633d34e013f822223091de3899221c70dfaa3ddc5653974a233c329874f
 size 71077780

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a9a826a2d90cc4f5ff05b6d650130cf6d8f9cf5274556c43cc1221cb845b250
 size 71077780

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4232f17d96925690067667b7fa4983410eb7f180b0b2200a6138930747d01dd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a695e79f5fc4cf88c45ff0e51e04b6fb8da79d8a32ee1ff4141c4f74aaa7c82d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7021275758743286,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.009937888198757764,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 57.798,
       "eval_steps_per_second": 14.451,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.035483590688768e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6998058557510376,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.013250517598343685,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.798,
       "eval_steps_per_second": 14.451,
       "step": 150
+    },
+    {
+      "epoch": 0.010004140786749483,
+      "grad_norm": 0.10729548335075378,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 1.4979,
+      "step": 151
+    },
+    {
+      "epoch": 0.0100703933747412,
+      "grad_norm": 0.11312545835971832,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 1.7936,
+      "step": 152
+    },
+    {
+      "epoch": 0.010136645962732919,
+      "grad_norm": 0.1266067922115326,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 1.7841,
+      "step": 153
+    },
+    {
+      "epoch": 0.010202898550724638,
+      "grad_norm": 0.11736242473125458,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 1.7188,
+      "step": 154
+    },
+    {
+      "epoch": 0.010269151138716357,
+      "grad_norm": 0.1273820996284485,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 1.8252,
+      "step": 155
+    },
+    {
+      "epoch": 0.010335403726708074,
+      "grad_norm": 0.13111338019371033,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 1.6367,
+      "step": 156
+    },
+    {
+      "epoch": 0.010401656314699793,
+      "grad_norm": 0.12052463740110397,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 1.7269,
+      "step": 157
+    },
+    {
+      "epoch": 0.010467908902691512,
+      "grad_norm": 0.1232714131474495,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 1.804,
+      "step": 158
+    },
+    {
+      "epoch": 0.01053416149068323,
+      "grad_norm": 0.11604554206132889,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 1.6583,
+      "step": 159
+    },
+    {
+      "epoch": 0.010600414078674947,
+      "grad_norm": 0.12130093574523926,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 1.6631,
+      "step": 160
+    },
+    {
+      "epoch": 0.010666666666666666,
+      "grad_norm": 0.12418082356452942,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 1.7618,
+      "step": 161
+    },
+    {
+      "epoch": 0.010732919254658385,
+      "grad_norm": 0.12796759605407715,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.6703,
+      "step": 162
+    },
+    {
+      "epoch": 0.010799171842650104,
+      "grad_norm": 0.13356627523899078,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 1.8309,
+      "step": 163
+    },
+    {
+      "epoch": 0.010865424430641823,
+      "grad_norm": 0.12603557109832764,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 1.8709,
+      "step": 164
+    },
+    {
+      "epoch": 0.01093167701863354,
+      "grad_norm": 0.12487031519412994,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 1.6649,
+      "step": 165
+    },
+    {
+      "epoch": 0.010997929606625259,
+      "grad_norm": 0.12683650851249695,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 1.8663,
+      "step": 166
+    },
+    {
+      "epoch": 0.011064182194616978,
+      "grad_norm": 0.12115644663572311,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 1.7037,
+      "step": 167
+    },
+    {
+      "epoch": 0.011130434782608696,
+      "grad_norm": 0.12117600440979004,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 1.7597,
+      "step": 168
+    },
+    {
+      "epoch": 0.011196687370600414,
+      "grad_norm": 0.12432435154914856,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 1.6298,
+      "step": 169
+    },
+    {
+      "epoch": 0.011262939958592132,
+      "grad_norm": 0.12444484233856201,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 1.618,
+      "step": 170
+    },
+    {
+      "epoch": 0.011329192546583851,
+      "grad_norm": 0.12366094440221786,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 1.6378,
+      "step": 171
+    },
+    {
+      "epoch": 0.01139544513457557,
+      "grad_norm": 0.12173435091972351,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 1.6687,
+      "step": 172
+    },
+    {
+      "epoch": 0.011461697722567287,
+      "grad_norm": 0.13134273886680603,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 1.7764,
+      "step": 173
+    },
+    {
+      "epoch": 0.011527950310559006,
+      "grad_norm": 0.12765035033226013,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 1.7224,
+      "step": 174
+    },
+    {
+      "epoch": 0.011594202898550725,
+      "grad_norm": 0.14251765608787537,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 1.6037,
+      "step": 175
+    },
+    {
+      "epoch": 0.011660455486542444,
+      "grad_norm": 0.13152077794075012,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 1.7116,
+      "step": 176
+    },
+    {
+      "epoch": 0.01172670807453416,
+      "grad_norm": 0.12907034158706665,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 1.6615,
+      "step": 177
+    },
+    {
+      "epoch": 0.01179296066252588,
+      "grad_norm": 0.12604530155658722,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 1.7437,
+      "step": 178
+    },
+    {
+      "epoch": 0.011859213250517598,
+      "grad_norm": 0.1284632831811905,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 1.6218,
+      "step": 179
+    },
+    {
+      "epoch": 0.011925465838509317,
+      "grad_norm": 0.12516699731349945,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 1.6682,
+      "step": 180
+    },
+    {
+      "epoch": 0.011991718426501036,
+      "grad_norm": 0.1341974288225174,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.6652,
+      "step": 181
+    },
+    {
+      "epoch": 0.012057971014492753,
+      "grad_norm": 0.1347489356994629,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 1.7097,
+      "step": 182
+    },
+    {
+      "epoch": 0.012124223602484472,
+      "grad_norm": 0.12645584344863892,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 1.6848,
+      "step": 183
+    },
+    {
+      "epoch": 0.01219047619047619,
+      "grad_norm": 0.14024832844734192,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 1.7295,
+      "step": 184
+    },
+    {
+      "epoch": 0.01225672877846791,
+      "grad_norm": 0.12278766930103302,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.6852,
+      "step": 185
+    },
+    {
+      "epoch": 0.012322981366459627,
+      "grad_norm": 0.1317070722579956,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 1.7597,
+      "step": 186
+    },
+    {
+      "epoch": 0.012389233954451346,
+      "grad_norm": 0.13534079492092133,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 1.7254,
+      "step": 187
+    },
+    {
+      "epoch": 0.012455486542443064,
+      "grad_norm": 0.1295829564332962,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 1.6592,
+      "step": 188
+    },
+    {
+      "epoch": 0.012521739130434783,
+      "grad_norm": 0.1378515362739563,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 1.6303,
+      "step": 189
+    },
+    {
+      "epoch": 0.0125879917184265,
+      "grad_norm": 0.13493184745311737,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 1.6417,
+      "step": 190
+    },
+    {
+      "epoch": 0.01265424430641822,
+      "grad_norm": 0.12714862823486328,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 1.6406,
+      "step": 191
+    },
+    {
+      "epoch": 0.012720496894409938,
+      "grad_norm": 0.1322127729654312,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 1.5815,
+      "step": 192
+    },
+    {
+      "epoch": 0.012786749482401657,
+      "grad_norm": 0.1372261494398117,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 1.6885,
+      "step": 193
+    },
+    {
+      "epoch": 0.012853002070393374,
+      "grad_norm": 0.13817864656448364,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 1.7072,
+      "step": 194
+    },
+    {
+      "epoch": 0.012919254658385093,
+      "grad_norm": 0.14454637467861176,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.5637,
+      "step": 195
+    },
+    {
+      "epoch": 0.012985507246376812,
+      "grad_norm": 0.141245499253273,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 1.7511,
+      "step": 196
+    },
+    {
+      "epoch": 0.01305175983436853,
+      "grad_norm": 0.1369493454694748,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 1.6315,
+      "step": 197
+    },
+    {
+      "epoch": 0.01311801242236025,
+      "grad_norm": 0.15233369171619415,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 1.479,
+      "step": 198
+    },
+    {
+      "epoch": 0.013184265010351966,
+      "grad_norm": 0.16971765458583832,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 1.6512,
+      "step": 199
+    },
+    {
+      "epoch": 0.013250517598343685,
+      "grad_norm": 0.26924997568130493,
+      "learning_rate": 0.0,
+      "loss": 1.3145,
+      "step": 200
+    },
+    {
+      "epoch": 0.013250517598343685,
+      "eval_loss": 1.6998058557510376,
+      "eval_runtime": 439.6685,
+      "eval_samples_per_second": 57.819,
+      "eval_steps_per_second": 14.456,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.380644787585024e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null