Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c3e7ca48d02945b4167ddeecbd976ee25026c0bf395812e6f22ab2e92041f24
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f3c6cb2cd5ff2d0dfb93e42b06557b7db97e6febcb83b2e28ea58a798cb9330
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41af7fd7417b812248a265b7795735424d496d523793bee6c7be779d9889f3d2
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:40ba31dcdf7afb4a87c59ee0c2741791e2438c9f0b8c3eb54f8ea4fd75e0c871
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:982a45d08e66d6debcb357cd08f69f49b1844a3ebff76fc61a037481810729c3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d291fedc593ddfa4b9b57d85f9f53542ce112653c8c92fc748292673336ec404
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba4af3b1b4fa156d60adeec70df709d1741ac2f3147c676ab2805007313fc707
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.812873125076294,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.17851829812555786,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 8.797,
       "eval_steps_per_second": 2.201,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.274084219959378e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.559772253036499,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.23802439750074383,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.797,
       "eval_steps_per_second": 2.201,
       "step": 150
+    },
+    {
+      "epoch": 0.1797084201130616,
+      "grad_norm": 2.1665163040161133,
+      "learning_rate": 2.6047368421052634e-05,
+      "loss": 2.9363,
+      "step": 151
+    },
+    {
+      "epoch": 0.1808985421005653,
+      "grad_norm": 2.231947422027588,
+      "learning_rate": 2.5515789473684213e-05,
+      "loss": 2.8189,
+      "step": 152
+    },
+    {
+      "epoch": 0.18208866408806904,
+      "grad_norm": 1.9498807191848755,
+      "learning_rate": 2.4984210526315788e-05,
+      "loss": 2.8967,
+      "step": 153
+    },
+    {
+      "epoch": 0.18327878607557274,
+      "grad_norm": 1.6646301746368408,
+      "learning_rate": 2.445263157894737e-05,
+      "loss": 2.9605,
+      "step": 154
+    },
+    {
+      "epoch": 0.18446890806307648,
+      "grad_norm": 1.2766884565353394,
+      "learning_rate": 2.3921052631578946e-05,
+      "loss": 2.7265,
+      "step": 155
+    },
+    {
+      "epoch": 0.18565903005058018,
+      "grad_norm": 1.0804224014282227,
+      "learning_rate": 2.3389473684210528e-05,
+      "loss": 2.7534,
+      "step": 156
+    },
+    {
+      "epoch": 0.18684915203808392,
+      "grad_norm": 0.9548969268798828,
+      "learning_rate": 2.2857894736842106e-05,
+      "loss": 2.7941,
+      "step": 157
+    },
+    {
+      "epoch": 0.18803927402558762,
+      "grad_norm": 0.8820357918739319,
+      "learning_rate": 2.2326315789473685e-05,
+      "loss": 2.5207,
+      "step": 158
+    },
+    {
+      "epoch": 0.18922939601309136,
+      "grad_norm": 0.9637076258659363,
+      "learning_rate": 2.1794736842105264e-05,
+      "loss": 2.5984,
+      "step": 159
+    },
+    {
+      "epoch": 0.19041951800059506,
+      "grad_norm": 0.9365648627281189,
+      "learning_rate": 2.1263157894736842e-05,
+      "loss": 2.6507,
+      "step": 160
+    },
+    {
+      "epoch": 0.19160963998809877,
+      "grad_norm": 0.9047538638114929,
+      "learning_rate": 2.073157894736842e-05,
+      "loss": 2.5624,
+      "step": 161
+    },
+    {
+      "epoch": 0.1927997619756025,
+      "grad_norm": 0.9913797974586487,
+      "learning_rate": 2.0200000000000003e-05,
+      "loss": 2.7407,
+      "step": 162
+    },
+    {
+      "epoch": 0.1939898839631062,
+      "grad_norm": 0.9947323203086853,
+      "learning_rate": 1.966842105263158e-05,
+      "loss": 2.6398,
+      "step": 163
+    },
+    {
+      "epoch": 0.19518000595060994,
+      "grad_norm": 0.9551875591278076,
+      "learning_rate": 1.913684210526316e-05,
+      "loss": 2.6093,
+      "step": 164
+    },
+    {
+      "epoch": 0.19637012793811365,
+      "grad_norm": 0.9988086819648743,
+      "learning_rate": 1.8605263157894736e-05,
+      "loss": 2.5585,
+      "step": 165
+    },
+    {
+      "epoch": 0.19756024992561738,
+      "grad_norm": 1.087716817855835,
+      "learning_rate": 1.8073684210526318e-05,
+      "loss": 2.6282,
+      "step": 166
+    },
+    {
+      "epoch": 0.1987503719131211,
+      "grad_norm": 1.0601743459701538,
+      "learning_rate": 1.7542105263157897e-05,
+      "loss": 2.6258,
+      "step": 167
+    },
+    {
+      "epoch": 0.19994049390062482,
+      "grad_norm": 1.1024737358093262,
+      "learning_rate": 1.7010526315789475e-05,
+      "loss": 2.5256,
+      "step": 168
+    },
+    {
+      "epoch": 0.20113061588812853,
+      "grad_norm": 1.1294111013412476,
+      "learning_rate": 1.6478947368421054e-05,
+      "loss": 2.6306,
+      "step": 169
+    },
+    {
+      "epoch": 0.20232073787563226,
+      "grad_norm": 1.1903879642486572,
+      "learning_rate": 1.5947368421052633e-05,
+      "loss": 2.6052,
+      "step": 170
+    },
+    {
+      "epoch": 0.20351085986313597,
+      "grad_norm": 1.253252387046814,
+      "learning_rate": 1.541578947368421e-05,
+      "loss": 2.7537,
+      "step": 171
+    },
+    {
+      "epoch": 0.2047009818506397,
+      "grad_norm": 1.3783352375030518,
+      "learning_rate": 1.4884210526315788e-05,
+      "loss": 2.5608,
+      "step": 172
+    },
+    {
+      "epoch": 0.2058911038381434,
+      "grad_norm": 1.3314725160598755,
+      "learning_rate": 1.4352631578947369e-05,
+      "loss": 2.6971,
+      "step": 173
+    },
+    {
+      "epoch": 0.20708122582564714,
+      "grad_norm": 1.3991272449493408,
+      "learning_rate": 1.3821052631578949e-05,
+      "loss": 2.6963,
+      "step": 174
+    },
+    {
+      "epoch": 0.20827134781315085,
+      "grad_norm": 1.5228500366210938,
+      "learning_rate": 1.3289473684210526e-05,
+      "loss": 2.5793,
+      "step": 175
+    },
+    {
+      "epoch": 0.20946146980065455,
+      "grad_norm": 1.4984205961227417,
+      "learning_rate": 1.2757894736842106e-05,
+      "loss": 2.666,
+      "step": 176
+    },
+    {
+      "epoch": 0.2106515917881583,
+      "grad_norm": 1.7694042921066284,
+      "learning_rate": 1.2226315789473685e-05,
+      "loss": 2.8852,
+      "step": 177
+    },
+    {
+      "epoch": 0.211841713775662,
+      "grad_norm": 1.8036147356033325,
+      "learning_rate": 1.1694736842105264e-05,
+      "loss": 2.7626,
+      "step": 178
+    },
+    {
+      "epoch": 0.21303183576316573,
+      "grad_norm": 1.7980536222457886,
+      "learning_rate": 1.1163157894736842e-05,
+      "loss": 2.7129,
+      "step": 179
+    },
+    {
+      "epoch": 0.21422195775066943,
+      "grad_norm": 2.07534122467041,
+      "learning_rate": 1.0631578947368421e-05,
+      "loss": 2.9602,
+      "step": 180
+    },
+    {
+      "epoch": 0.21541207973817317,
+      "grad_norm": 2.0630900859832764,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 2.765,
+      "step": 181
+    },
+    {
+      "epoch": 0.21660220172567687,
+      "grad_norm": 2.077697992324829,
+      "learning_rate": 9.56842105263158e-06,
+      "loss": 2.5767,
+      "step": 182
+    },
+    {
+      "epoch": 0.2177923237131806,
+      "grad_norm": 2.0414209365844727,
+      "learning_rate": 9.036842105263159e-06,
+      "loss": 2.7381,
+      "step": 183
+    },
+    {
+      "epoch": 0.21898244570068431,
+      "grad_norm": 2.3121683597564697,
+      "learning_rate": 8.505263157894738e-06,
+      "loss": 2.6139,
+      "step": 184
+    },
+    {
+      "epoch": 0.22017256768818805,
+      "grad_norm": 2.3920252323150635,
+      "learning_rate": 7.973684210526316e-06,
+      "loss": 2.8152,
+      "step": 185
+    },
+    {
+      "epoch": 0.22136268967569175,
+      "grad_norm": 2.3734066486358643,
+      "learning_rate": 7.442105263157894e-06,
+      "loss": 2.3302,
+      "step": 186
+    },
+    {
+      "epoch": 0.2225528116631955,
+      "grad_norm": 2.291586399078369,
+      "learning_rate": 6.9105263157894745e-06,
+      "loss": 2.7101,
+      "step": 187
+    },
+    {
+      "epoch": 0.2237429336506992,
+      "grad_norm": 2.3693385124206543,
+      "learning_rate": 6.378947368421053e-06,
+      "loss": 2.232,
+      "step": 188
+    },
+    {
+      "epoch": 0.22493305563820293,
+      "grad_norm": 3.0694398880004883,
+      "learning_rate": 5.847368421052632e-06,
+      "loss": 2.2262,
+      "step": 189
+    },
+    {
+      "epoch": 0.22612317762570663,
+      "grad_norm": 2.5530786514282227,
+      "learning_rate": 5.315789473684211e-06,
+      "loss": 2.2363,
+      "step": 190
+    },
+    {
+      "epoch": 0.22731329961321037,
+      "grad_norm": 2.73111629486084,
+      "learning_rate": 4.78421052631579e-06,
+      "loss": 2.3876,
+      "step": 191
+    },
+    {
+      "epoch": 0.22850342160071407,
+      "grad_norm": 2.807893753051758,
+      "learning_rate": 4.252631578947369e-06,
+      "loss": 2.261,
+      "step": 192
+    },
+    {
+      "epoch": 0.22969354358821778,
+      "grad_norm": 2.7763075828552246,
+      "learning_rate": 3.721052631578947e-06,
+      "loss": 2.0528,
+      "step": 193
+    },
+    {
+      "epoch": 0.23088366557572151,
+      "grad_norm": 3.2379202842712402,
+      "learning_rate": 3.1894736842105266e-06,
+      "loss": 2.5698,
+      "step": 194
+    },
+    {
+      "epoch": 0.23207378756322522,
+      "grad_norm": 4.352906227111816,
+      "learning_rate": 2.6578947368421053e-06,
+      "loss": 2.8109,
+      "step": 195
+    },
+    {
+      "epoch": 0.23326390955072895,
+      "grad_norm": 3.5866363048553467,
+      "learning_rate": 2.1263157894736844e-06,
+      "loss": 2.2815,
+      "step": 196
+    },
+    {
+      "epoch": 0.23445403153823266,
+      "grad_norm": 4.414037227630615,
+      "learning_rate": 1.5947368421052633e-06,
+      "loss": 2.4135,
+      "step": 197
+    },
+    {
+      "epoch": 0.2356441535257364,
+      "grad_norm": 4.259603500366211,
+      "learning_rate": 1.0631578947368422e-06,
+      "loss": 2.0559,
+      "step": 198
+    },
+    {
+      "epoch": 0.2368342755132401,
+      "grad_norm": 4.946102619171143,
+      "learning_rate": 5.315789473684211e-07,
+      "loss": 1.8129,
+      "step": 199
+    },
+    {
+      "epoch": 0.23802439750074383,
+      "grad_norm": 7.032172203063965,
+      "learning_rate": 0.0,
+      "loss": 3.0412,
+      "step": 200
+    },
+    {
+      "epoch": 0.23802439750074383,
+      "eval_loss": 2.559772253036499,
+      "eval_runtime": 160.6933,
+      "eval_samples_per_second": 8.806,
+      "eval_steps_per_second": 2.203,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.0321122932791706e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null