Training in progress, step 250, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b744b1ce429a59148cc3edb86a39988f0c7f82f2f4cec917ab4700516a1d2d4d
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:b81c89bf5a89dea220e294a4652d890349bf12553800a49024591d26ca2757a4
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4435c9dd46ee360c2866c624c80beecd8c54ccd488b1f5ad0e271d9c35a6382a
 size 164464564

 version https://git-lfs.github.com/spec/v1
+oid sha256:4536e6d49005fda0a95151faebf86801ba2e3b8da30b27d77d184347ee694363
 size 164464564

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7726f50d6970cd5b957fbd3f859f311c8be3d0ced4fed6a04709ea0c71181063
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:356dcfdc03c399d2e663c95cf1133f32813c707adc33ad61bc750dcc5222213f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47345b2adedc866297f19eccd74a7db75a3d504bcbba513f5e3ae09a2efa0798
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:517cde929c0b918b0e53e0ffd764ecc43637194fb41b83640993bbae7c21d100
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.42559075355529785,
-  "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.33948652662847445,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
       "eval_samples_per_second": 2.941,
       "eval_steps_per_second": 2.941,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1475,7 +1833,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.29551053519061e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.39342138171195984,
+  "best_model_checkpoint": "miner_id_24/checkpoint-250",
+  "epoch": 0.42435815828559303,
   "eval_steps": 50,
+  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.941,
       "eval_steps_per_second": 2.941,
       "step": 200
+    },
+    {
+      "epoch": 0.3411839592616168,
+      "grad_norm": 0.5887985825538635,
+      "learning_rate": 0.0002732809457224292,
+      "loss": 0.9434,
+      "step": 201
+    },
+    {
+      "epoch": 0.3428813918947592,
+      "grad_norm": 0.38302966952323914,
+      "learning_rate": 0.00027300916279262866,
+      "loss": 1.0988,
+      "step": 202
+    },
+    {
+      "epoch": 0.34457882452790156,
+      "grad_norm": 0.2614414095878601,
+      "learning_rate": 0.0002727361411619245,
+      "loss": 0.6772,
+      "step": 203
+    },
+    {
+      "epoch": 0.34627625716104393,
+      "grad_norm": 0.3063081204891205,
+      "learning_rate": 0.0002724618835796414,
+      "loss": 0.7314,
+      "step": 204
+    },
+    {
+      "epoch": 0.3479736897941863,
+      "grad_norm": 0.18658895790576935,
+      "learning_rate": 0.0002721863928075503,
+      "loss": 0.514,
+      "step": 205
+    },
+    {
+      "epoch": 0.3496711224273287,
+      "grad_norm": 0.282010018825531,
+      "learning_rate": 0.0002719096716198402,
+      "loss": 0.892,
+      "step": 206
+    },
+    {
+      "epoch": 0.35136855506047104,
+      "grad_norm": 0.17541489005088806,
+      "learning_rate": 0.00027163172280309026,
+      "loss": 0.5047,
+      "step": 207
+    },
+    {
+      "epoch": 0.3530659876936134,
+      "grad_norm": 0.21916832029819489,
+      "learning_rate": 0.0002713525491562421,
+      "loss": 0.7146,
+      "step": 208
+    },
+    {
+      "epoch": 0.3547634203267558,
+      "grad_norm": 0.20561501383781433,
+      "learning_rate": 0.0002710721534905712,
+      "loss": 0.6104,
+      "step": 209
+    },
+    {
+      "epoch": 0.35646085295989816,
+      "grad_norm": 0.1947142332792282,
+      "learning_rate": 0.00027079053862965875,
+      "loss": 0.4924,
+      "step": 210
+    },
+    {
+      "epoch": 0.3581582855930405,
+      "grad_norm": 0.23798146843910217,
+      "learning_rate": 0.00027050770740936336,
+      "loss": 0.6153,
+      "step": 211
+    },
+    {
+      "epoch": 0.3598557182261829,
+      "grad_norm": 0.21775560081005096,
+      "learning_rate": 0.00027022366267779224,
+      "loss": 0.4658,
+      "step": 212
+    },
+    {
+      "epoch": 0.36155315085932527,
+      "grad_norm": 0.24994409084320068,
+      "learning_rate": 0.0002699384072952727,
+      "loss": 0.5979,
+      "step": 213
+    },
+    {
+      "epoch": 0.36325058349246764,
+      "grad_norm": 0.28469640016555786,
+      "learning_rate": 0.0002696519441343233,
+      "loss": 0.8796,
+      "step": 214
+    },
+    {
+      "epoch": 0.36494801612561,
+      "grad_norm": 0.2747570276260376,
+      "learning_rate": 0.0002693642760796248,
+      "loss": 0.8625,
+      "step": 215
+    },
+    {
+      "epoch": 0.3666454487587524,
+      "grad_norm": 0.2469591647386551,
+      "learning_rate": 0.00026907540602799136,
+      "loss": 0.6991,
+      "step": 216
+    },
+    {
+      "epoch": 0.36834288139189475,
+      "grad_norm": 0.20425763726234436,
+      "learning_rate": 0.00026878533688834123,
+      "loss": 0.5774,
+      "step": 217
+    },
+    {
+      "epoch": 0.3700403140250371,
+      "grad_norm": 0.2737872898578644,
+      "learning_rate": 0.0002684940715816674,
+      "loss": 0.9062,
+      "step": 218
+    },
+    {
+      "epoch": 0.3717377466581795,
+      "grad_norm": 0.2064121514558792,
+      "learning_rate": 0.00026820161304100823,
+      "loss": 0.5054,
+      "step": 219
+    },
+    {
+      "epoch": 0.37343517929132186,
+      "grad_norm": 0.14863868057727814,
+      "learning_rate": 0.00026790796421141813,
+      "loss": 0.285,
+      "step": 220
+    },
+    {
+      "epoch": 0.37513261192446423,
+      "grad_norm": 0.12498918920755386,
+      "learning_rate": 0.00026761312804993734,
+      "loss": 0.1999,
+      "step": 221
+    },
+    {
+      "epoch": 0.3768300445576066,
+      "grad_norm": 0.1726280152797699,
+      "learning_rate": 0.0002673171075255629,
+      "loss": 0.2852,
+      "step": 222
+    },
+    {
+      "epoch": 0.378527477190749,
+      "grad_norm": 0.1533537358045578,
+      "learning_rate": 0.0002670199056192181,
+      "loss": 0.3106,
+      "step": 223
+    },
+    {
+      "epoch": 0.38022490982389134,
+      "grad_norm": 0.125217467546463,
+      "learning_rate": 0.00026672152532372287,
+      "loss": 0.1804,
+      "step": 224
+    },
+    {
+      "epoch": 0.3819223424570337,
+      "grad_norm": 0.05522383376955986,
+      "learning_rate": 0.0002664219696437635,
+      "loss": 0.0442,
+      "step": 225
+    },
+    {
+      "epoch": 0.3836197750901761,
+      "grad_norm": 0.04138198867440224,
+      "learning_rate": 0.00026612124159586237,
+      "loss": 0.023,
+      "step": 226
+    },
+    {
+      "epoch": 0.38531720772331846,
+      "grad_norm": 0.05575822666287422,
+      "learning_rate": 0.0002658193442083475,
+      "loss": 0.0024,
+      "step": 227
+    },
+    {
+      "epoch": 0.3870146403564608,
+      "grad_norm": 0.12629126012325287,
+      "learning_rate": 0.0002655162805213223,
+      "loss": 0.1524,
+      "step": 228
+    },
+    {
+      "epoch": 0.38871207298960325,
+      "grad_norm": 0.02942221239209175,
+      "learning_rate": 0.00026521205358663477,
+      "loss": 0.0096,
+      "step": 229
+    },
+    {
+      "epoch": 0.3904095056227456,
+      "grad_norm": 0.0953650251030922,
+      "learning_rate": 0.00026490666646784665,
+      "loss": 0.0043,
+      "step": 230
+    },
+    {
+      "epoch": 0.392106938255888,
+      "grad_norm": 0.005734459031373262,
+      "learning_rate": 0.00026460012224020297,
+      "loss": 0.0003,
+      "step": 231
+    },
+    {
+      "epoch": 0.39380437088903036,
+      "grad_norm": 0.010758363641798496,
+      "learning_rate": 0.0002642924239906006,
+      "loss": 0.0003,
+      "step": 232
+    },
+    {
+      "epoch": 0.39550180352217273,
+      "grad_norm": 0.01772010512650013,
+      "learning_rate": 0.0002639835748175575,
+      "loss": 0.0007,
+      "step": 233
+    },
+    {
+      "epoch": 0.3971992361553151,
+      "grad_norm": 0.005056055262684822,
+      "learning_rate": 0.0002636735778311815,
+      "loss": 0.0002,
+      "step": 234
+    },
+    {
+      "epoch": 0.3988966687884575,
+      "grad_norm": 0.24263891577720642,
+      "learning_rate": 0.00026336243615313873,
+      "loss": 0.0008,
+      "step": 235
+    },
+    {
+      "epoch": 0.40059410142159985,
+      "grad_norm": 0.0014849180588498712,
+      "learning_rate": 0.0002630501529166224,
+      "loss": 0.0001,
+      "step": 236
+    },
+    {
+      "epoch": 0.4022915340547422,
+      "grad_norm": 0.0037826071493327618,
+      "learning_rate": 0.00026273673126632133,
+      "loss": 0.0002,
+      "step": 237
+    },
+    {
+      "epoch": 0.4039889666878846,
+      "grad_norm": 0.08331254124641418,
+      "learning_rate": 0.0002624221743583881,
+      "loss": 0.0016,
+      "step": 238
+    },
+    {
+      "epoch": 0.40568639932102696,
+      "grad_norm": 0.002364553976804018,
+      "learning_rate": 0.0002621064853604071,
+      "loss": 0.0001,
+      "step": 239
+    },
+    {
+      "epoch": 0.40738383195416933,
+      "grad_norm": 0.014542756602168083,
+      "learning_rate": 0.0002617896674513632,
+      "loss": 0.0002,
+      "step": 240
+    },
+    {
+      "epoch": 0.4090812645873117,
+      "grad_norm": 0.0031418628059327602,
+      "learning_rate": 0.00026147172382160914,
+      "loss": 0.0001,
+      "step": 241
+    },
+    {
+      "epoch": 0.41077869722045407,
+      "grad_norm": 0.11094752699136734,
+      "learning_rate": 0.00026115265767283374,
+      "loss": 0.0031,
+      "step": 242
+    },
+    {
+      "epoch": 0.41247612985359644,
+      "grad_norm": 0.012769564054906368,
+      "learning_rate": 0.0002608324722180296,
+      "loss": 0.0005,
+      "step": 243
+    },
+    {
+      "epoch": 0.4141735624867388,
+      "grad_norm": 0.055052801966667175,
+      "learning_rate": 0.0002605111706814607,
+      "loss": 0.0023,
+      "step": 244
+    },
+    {
+      "epoch": 0.4158709951198812,
+      "grad_norm": 0.003668338293209672,
+      "learning_rate": 0.00026018875629862996,
+      "loss": 0.0002,
+      "step": 245
+    },
+    {
+      "epoch": 0.41756842775302355,
+      "grad_norm": 0.009973675012588501,
+      "learning_rate": 0.0002598652323162466,
+      "loss": 0.0003,
+      "step": 246
+    },
+    {
+      "epoch": 0.4192658603861659,
+      "grad_norm": 0.02005830593407154,
+      "learning_rate": 0.0002595406019921936,
+      "loss": 0.0008,
+      "step": 247
+    },
+    {
+      "epoch": 0.4209632930193083,
+      "grad_norm": 0.02860446274280548,
+      "learning_rate": 0.0002592148685954946,
+      "loss": 0.0024,
+      "step": 248
+    },
+    {
+      "epoch": 0.42266072565245066,
+      "grad_norm": 0.03582284599542618,
+      "learning_rate": 0.0002588880354062814,
+      "loss": 0.0014,
+      "step": 249
+    },
+    {
+      "epoch": 0.42435815828559303,
+      "grad_norm": 0.03657930716872215,
+      "learning_rate": 0.0002585601057157605,
+      "loss": 0.0023,
+      "step": 250
+    },
+    {
+      "epoch": 0.42435815828559303,
+      "eval_loss": 0.39342138171195984,
+      "eval_runtime": 65.6277,
+      "eval_samples_per_second": 2.941,
+      "eval_steps_per_second": 2.941,
+      "step": 250
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6186741772569805e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null