End of training

Browse files

Files changed (6) hide show

README.md +4 -1
all_results.json +11 -0
eval_results.json +7 -0
runs/Mar11_17-03-21_bf1b508326f5/events.out.tfevents.1710184514.bf1b508326f5.24615.1 +3 -0
train_results.json +7 -0
trainer_state.json +975 -0

README.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 tags:
 - generated_from_trainer
 datasets:
 - imagefolder
@@ -13,7 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
 # radiovers16v
-This model is a fine-tuned version of [](https://huggingface.co/) on the imagefolder dataset.
 ## Model description

 ---
 tags:
+- masked-auto-encoding
 - generated_from_trainer
 datasets:
 - imagefolder
 # radiovers16v
+This model is a fine-tuned version of [](https://huggingface.co/) on the /kaggle/radioai/radiology_ai dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4036
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 40.0,
+    "eval_loss": 0.40356436371803284,
+    "eval_runtime": 706.3909,
+    "eval_samples_per_second": 172.196,
+    "eval_steps_per_second": 21.525,
+    "train_loss": 0.47529568801970173,
+    "train_runtime": 7092.3542,
+    "train_samples_per_second": 76.223,
+    "train_steps_per_second": 9.531
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 40.0,
+    "eval_loss": 0.40356436371803284,
+    "eval_runtime": 706.3909,
+    "eval_samples_per_second": 172.196,
+    "eval_steps_per_second": 21.525
+}

runs/Mar11_17-03-21_bf1b508326f5/events.out.tfevents.1710184514.bf1b508326f5.24615.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e6994461c78758cd528cdb289c24c28035aae7bfb44bac60a9f7808347e8712
+size 364

train_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 40.0,
+    "train_loss": 0.47529568801970173,
+    "train_runtime": 7092.3542,
+    "train_samples_per_second": 76.223,
+    "train_steps_per_second": 9.531
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,975 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 40.0,
+  "eval_steps": 500,
+  "global_step": 67600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.0696972981095314,
+      "learning_rate": 3.1018860946745563e-05,
+      "loss": 0.785,
+      "step": 500
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 0.1446572095155716,
+      "learning_rate": 3.0787721893491126e-05,
+      "loss": 0.7706,
+      "step": 1000
+    },
+    {
+      "epoch": 0.89,
+      "grad_norm": 0.2664627134799957,
+      "learning_rate": 3.055658284023669e-05,
+      "loss": 0.7614,
+      "step": 1500
+    },
+    {
+      "epoch": 1.18,
+      "grad_norm": 0.2499350905418396,
+      "learning_rate": 3.032544378698225e-05,
+      "loss": 0.7472,
+      "step": 2000
+    },
+    {
+      "epoch": 1.48,
+      "grad_norm": 0.37448567152023315,
+      "learning_rate": 3.009430473372781e-05,
+      "loss": 0.7371,
+      "step": 2500
+    },
+    {
+      "epoch": 1.78,
+      "grad_norm": 0.5009666085243225,
+      "learning_rate": 2.9863165680473374e-05,
+      "loss": 0.7284,
+      "step": 3000
+    },
+    {
+      "epoch": 2.07,
+      "grad_norm": 0.4846726357936859,
+      "learning_rate": 2.9632026627218937e-05,
+      "loss": 0.707,
+      "step": 3500
+    },
+    {
+      "epoch": 2.37,
+      "grad_norm": 0.5636312365531921,
+      "learning_rate": 2.94008875739645e-05,
+      "loss": 0.692,
+      "step": 4000
+    },
+    {
+      "epoch": 2.66,
+      "grad_norm": 0.9609221816062927,
+      "learning_rate": 2.9169748520710063e-05,
+      "loss": 0.6832,
+      "step": 4500
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 0.6074559092521667,
+      "learning_rate": 2.893860946745562e-05,
+      "loss": 0.6713,
+      "step": 5000
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 0.6442582011222839,
+      "learning_rate": 2.8707470414201182e-05,
+      "loss": 0.6624,
+      "step": 5500
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 0.52489173412323,
+      "learning_rate": 2.8476331360946745e-05,
+      "loss": 0.6533,
+      "step": 6000
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 0.6586915254592896,
+      "learning_rate": 2.8245192307692307e-05,
+      "loss": 0.6414,
+      "step": 6500
+    },
+    {
+      "epoch": 4.14,
+      "grad_norm": 0.7074326276779175,
+      "learning_rate": 2.801405325443787e-05,
+      "loss": 0.632,
+      "step": 7000
+    },
+    {
+      "epoch": 4.44,
+      "grad_norm": 0.7869051694869995,
+      "learning_rate": 2.7782914201183433e-05,
+      "loss": 0.625,
+      "step": 7500
+    },
+    {
+      "epoch": 4.73,
+      "grad_norm": 0.7031483054161072,
+      "learning_rate": 2.7551775147928993e-05,
+      "loss": 0.6116,
+      "step": 8000
+    },
+    {
+      "epoch": 5.03,
+      "grad_norm": 0.7165437340736389,
+      "learning_rate": 2.7320636094674555e-05,
+      "loss": 0.6067,
+      "step": 8500
+    },
+    {
+      "epoch": 5.33,
+      "grad_norm": 0.6308349967002869,
+      "learning_rate": 2.708949704142012e-05,
+      "loss": 0.594,
+      "step": 9000
+    },
+    {
+      "epoch": 5.62,
+      "grad_norm": 0.7305271625518799,
+      "learning_rate": 2.685835798816568e-05,
+      "loss": 0.5837,
+      "step": 9500
+    },
+    {
+      "epoch": 5.92,
+      "grad_norm": 0.8089825510978699,
+      "learning_rate": 2.6627218934911244e-05,
+      "loss": 0.5725,
+      "step": 10000
+    },
+    {
+      "epoch": 6.21,
+      "grad_norm": 0.7770040035247803,
+      "learning_rate": 2.6396079881656807e-05,
+      "loss": 0.5674,
+      "step": 10500
+    },
+    {
+      "epoch": 6.51,
+      "grad_norm": 0.7730346322059631,
+      "learning_rate": 2.6164940828402366e-05,
+      "loss": 0.5567,
+      "step": 11000
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.6454223990440369,
+      "learning_rate": 2.593380177514793e-05,
+      "loss": 0.5486,
+      "step": 11500
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 0.5882906317710876,
+      "learning_rate": 2.5702662721893492e-05,
+      "loss": 0.5396,
+      "step": 12000
+    },
+    {
+      "epoch": 7.4,
+      "grad_norm": 0.8279200792312622,
+      "learning_rate": 2.5471523668639055e-05,
+      "loss": 0.5309,
+      "step": 12500
+    },
+    {
+      "epoch": 7.69,
+      "grad_norm": 0.8009528517723083,
+      "learning_rate": 2.5240384615384618e-05,
+      "loss": 0.5288,
+      "step": 13000
+    },
+    {
+      "epoch": 7.99,
+      "grad_norm": 0.7412715554237366,
+      "learning_rate": 2.500924556213018e-05,
+      "loss": 0.5198,
+      "step": 13500
+    },
+    {
+      "epoch": 8.28,
+      "grad_norm": 0.9230983853340149,
+      "learning_rate": 2.4778106508875743e-05,
+      "loss": 0.5163,
+      "step": 14000
+    },
+    {
+      "epoch": 8.58,
+      "grad_norm": 0.7999468445777893,
+      "learning_rate": 2.45469674556213e-05,
+      "loss": 0.5131,
+      "step": 14500
+    },
+    {
+      "epoch": 8.88,
+      "grad_norm": 0.7494385838508606,
+      "learning_rate": 2.4315828402366862e-05,
+      "loss": 0.506,
+      "step": 15000
+    },
+    {
+      "epoch": 9.17,
+      "grad_norm": 0.7356762886047363,
+      "learning_rate": 2.4084689349112425e-05,
+      "loss": 0.5036,
+      "step": 15500
+    },
+    {
+      "epoch": 9.47,
+      "grad_norm": 0.8011249303817749,
+      "learning_rate": 2.3853550295857988e-05,
+      "loss": 0.497,
+      "step": 16000
+    },
+    {
+      "epoch": 9.76,
+      "grad_norm": 0.713610827922821,
+      "learning_rate": 2.362241124260355e-05,
+      "loss": 0.4967,
+      "step": 16500
+    },
+    {
+      "epoch": 10.06,
+      "grad_norm": 0.8254227042198181,
+      "learning_rate": 2.3391272189349114e-05,
+      "loss": 0.4911,
+      "step": 17000
+    },
+    {
+      "epoch": 10.36,
+      "grad_norm": 0.7040392756462097,
+      "learning_rate": 2.3160133136094673e-05,
+      "loss": 0.4859,
+      "step": 17500
+    },
+    {
+      "epoch": 10.65,
+      "grad_norm": 0.7733869552612305,
+      "learning_rate": 2.2928994082840236e-05,
+      "loss": 0.4858,
+      "step": 18000
+    },
+    {
+      "epoch": 10.95,
+      "grad_norm": 0.8573015928268433,
+      "learning_rate": 2.26978550295858e-05,
+      "loss": 0.4829,
+      "step": 18500
+    },
+    {
+      "epoch": 11.24,
+      "grad_norm": 0.693286657333374,
+      "learning_rate": 2.2466715976331362e-05,
+      "loss": 0.4755,
+      "step": 19000
+    },
+    {
+      "epoch": 11.54,
+      "grad_norm": 0.7536494135856628,
+      "learning_rate": 2.2235576923076925e-05,
+      "loss": 0.4768,
+      "step": 19500
+    },
+    {
+      "epoch": 11.83,
+      "grad_norm": 0.6219621896743774,
+      "learning_rate": 2.2004437869822487e-05,
+      "loss": 0.4757,
+      "step": 20000
+    },
+    {
+      "epoch": 12.13,
+      "grad_norm": 0.7244569063186646,
+      "learning_rate": 2.1773298816568047e-05,
+      "loss": 0.473,
+      "step": 20500
+    },
+    {
+      "epoch": 12.43,
+      "grad_norm": 0.7847468852996826,
+      "learning_rate": 2.154215976331361e-05,
+      "loss": 0.4696,
+      "step": 21000
+    },
+    {
+      "epoch": 12.72,
+      "grad_norm": 0.7616731524467468,
+      "learning_rate": 2.1311020710059173e-05,
+      "loss": 0.4696,
+      "step": 21500
+    },
+    {
+      "epoch": 13.02,
+      "grad_norm": 0.7453758716583252,
+      "learning_rate": 2.1079881656804735e-05,
+      "loss": 0.4687,
+      "step": 22000
+    },
+    {
+      "epoch": 13.31,
+      "grad_norm": 0.6706910729408264,
+      "learning_rate": 2.0848742603550298e-05,
+      "loss": 0.4624,
+      "step": 22500
+    },
+    {
+      "epoch": 13.61,
+      "grad_norm": 0.819572389125824,
+      "learning_rate": 2.061760355029586e-05,
+      "loss": 0.4603,
+      "step": 23000
+    },
+    {
+      "epoch": 13.91,
+      "grad_norm": 0.6898177266120911,
+      "learning_rate": 2.0386464497041417e-05,
+      "loss": 0.4599,
+      "step": 23500
+    },
+    {
+      "epoch": 14.2,
+      "grad_norm": 0.6775723099708557,
+      "learning_rate": 2.015532544378698e-05,
+      "loss": 0.4622,
+      "step": 24000
+    },
+    {
+      "epoch": 14.5,
+      "grad_norm": 0.7278532385826111,
+      "learning_rate": 1.9924186390532543e-05,
+      "loss": 0.4573,
+      "step": 24500
+    },
+    {
+      "epoch": 14.79,
+      "grad_norm": 0.6195204257965088,
+      "learning_rate": 1.9693047337278106e-05,
+      "loss": 0.4536,
+      "step": 25000
+    },
+    {
+      "epoch": 15.09,
+      "grad_norm": 0.6975180506706238,
+      "learning_rate": 1.946190828402367e-05,
+      "loss": 0.4532,
+      "step": 25500
+    },
+    {
+      "epoch": 15.38,
+      "grad_norm": 0.7116599678993225,
+      "learning_rate": 1.923076923076923e-05,
+      "loss": 0.4521,
+      "step": 26000
+    },
+    {
+      "epoch": 15.68,
+      "grad_norm": 0.6533932685852051,
+      "learning_rate": 1.8999630177514794e-05,
+      "loss": 0.4513,
+      "step": 26500
+    },
+    {
+      "epoch": 15.98,
+      "grad_norm": 0.580528736114502,
+      "learning_rate": 1.8768491124260354e-05,
+      "loss": 0.4518,
+      "step": 27000
+    },
+    {
+      "epoch": 16.27,
+      "grad_norm": 0.8283082842826843,
+      "learning_rate": 1.8537352071005917e-05,
+      "loss": 0.4473,
+      "step": 27500
+    },
+    {
+      "epoch": 16.57,
+      "grad_norm": 0.6264183521270752,
+      "learning_rate": 1.830621301775148e-05,
+      "loss": 0.4466,
+      "step": 28000
+    },
+    {
+      "epoch": 16.86,
+      "grad_norm": 0.6502621173858643,
+      "learning_rate": 1.8075073964497042e-05,
+      "loss": 0.446,
+      "step": 28500
+    },
+    {
+      "epoch": 17.16,
+      "grad_norm": 0.6924391984939575,
+      "learning_rate": 1.7843934911242605e-05,
+      "loss": 0.4433,
+      "step": 29000
+    },
+    {
+      "epoch": 17.46,
+      "grad_norm": 0.631476879119873,
+      "learning_rate": 1.7612795857988168e-05,
+      "loss": 0.4446,
+      "step": 29500
+    },
+    {
+      "epoch": 17.75,
+      "grad_norm": 0.6945323348045349,
+      "learning_rate": 1.7381656804733727e-05,
+      "loss": 0.4451,
+      "step": 30000
+    },
+    {
+      "epoch": 18.05,
+      "grad_norm": 0.6200039386749268,
+      "learning_rate": 1.715051775147929e-05,
+      "loss": 0.4434,
+      "step": 30500
+    },
+    {
+      "epoch": 18.34,
+      "grad_norm": 0.6730862259864807,
+      "learning_rate": 1.6919378698224853e-05,
+      "loss": 0.4426,
+      "step": 31000
+    },
+    {
+      "epoch": 18.64,
+      "grad_norm": 0.6520936489105225,
+      "learning_rate": 1.6688239644970416e-05,
+      "loss": 0.4401,
+      "step": 31500
+    },
+    {
+      "epoch": 18.93,
+      "grad_norm": 0.7381883263587952,
+      "learning_rate": 1.645710059171598e-05,
+      "loss": 0.4381,
+      "step": 32000
+    },
+    {
+      "epoch": 19.23,
+      "grad_norm": 0.6962296962738037,
+      "learning_rate": 1.622596153846154e-05,
+      "loss": 0.4399,
+      "step": 32500
+    },
+    {
+      "epoch": 19.53,
+      "grad_norm": 0.5711750388145447,
+      "learning_rate": 1.5994822485207098e-05,
+      "loss": 0.4354,
+      "step": 33000
+    },
+    {
+      "epoch": 19.82,
+      "grad_norm": 0.6115343570709229,
+      "learning_rate": 1.576368343195266e-05,
+      "loss": 0.437,
+      "step": 33500
+    },
+    {
+      "epoch": 20.12,
+      "grad_norm": 0.6140381693840027,
+      "learning_rate": 1.5532544378698223e-05,
+      "loss": 0.4341,
+      "step": 34000
+    },
+    {
+      "epoch": 20.41,
+      "grad_norm": 0.648704469203949,
+      "learning_rate": 1.5301405325443786e-05,
+      "loss": 0.4337,
+      "step": 34500
+    },
+    {
+      "epoch": 20.71,
+      "grad_norm": 0.6556956171989441,
+      "learning_rate": 1.507026627218935e-05,
+      "loss": 0.4333,
+      "step": 35000
+    },
+    {
+      "epoch": 21.01,
+      "grad_norm": 0.7024092674255371,
+      "learning_rate": 1.483912721893491e-05,
+      "loss": 0.4357,
+      "step": 35500
+    },
+    {
+      "epoch": 21.3,
+      "grad_norm": 0.5994529128074646,
+      "learning_rate": 1.4607988165680473e-05,
+      "loss": 0.4311,
+      "step": 36000
+    },
+    {
+      "epoch": 21.6,
+      "grad_norm": 0.599431037902832,
+      "learning_rate": 1.4376849112426036e-05,
+      "loss": 0.4334,
+      "step": 36500
+    },
+    {
+      "epoch": 21.89,
+      "grad_norm": 0.6323761343955994,
+      "learning_rate": 1.4145710059171597e-05,
+      "loss": 0.4298,
+      "step": 37000
+    },
+    {
+      "epoch": 22.19,
+      "grad_norm": 0.6665933132171631,
+      "learning_rate": 1.391457100591716e-05,
+      "loss": 0.4281,
+      "step": 37500
+    },
+    {
+      "epoch": 22.49,
+      "grad_norm": 0.6103574633598328,
+      "learning_rate": 1.3683431952662723e-05,
+      "loss": 0.4311,
+      "step": 38000
+    },
+    {
+      "epoch": 22.78,
+      "grad_norm": 0.5954911708831787,
+      "learning_rate": 1.3452292899408284e-05,
+      "loss": 0.4277,
+      "step": 38500
+    },
+    {
+      "epoch": 23.08,
+      "grad_norm": 0.5706931352615356,
+      "learning_rate": 1.3221153846153847e-05,
+      "loss": 0.4278,
+      "step": 39000
+    },
+    {
+      "epoch": 23.37,
+      "grad_norm": 0.5817924737930298,
+      "learning_rate": 1.299001479289941e-05,
+      "loss": 0.4239,
+      "step": 39500
+    },
+    {
+      "epoch": 23.67,
+      "grad_norm": 0.591736912727356,
+      "learning_rate": 1.2758875739644969e-05,
+      "loss": 0.428,
+      "step": 40000
+    },
+    {
+      "epoch": 23.96,
+      "grad_norm": 0.6267042756080627,
+      "learning_rate": 1.2527736686390532e-05,
+      "loss": 0.4275,
+      "step": 40500
+    },
+    {
+      "epoch": 24.26,
+      "grad_norm": 0.5819630026817322,
+      "learning_rate": 1.2296597633136095e-05,
+      "loss": 0.4262,
+      "step": 41000
+    },
+    {
+      "epoch": 24.56,
+      "grad_norm": 0.615161657333374,
+      "learning_rate": 1.2065458579881656e-05,
+      "loss": 0.4235,
+      "step": 41500
+    },
+    {
+      "epoch": 24.85,
+      "grad_norm": 0.7147814631462097,
+      "learning_rate": 1.1834319526627219e-05,
+      "loss": 0.423,
+      "step": 42000
+    },
+    {
+      "epoch": 25.15,
+      "grad_norm": 0.7751194834709167,
+      "learning_rate": 1.1603180473372782e-05,
+      "loss": 0.422,
+      "step": 42500
+    },
+    {
+      "epoch": 25.44,
+      "grad_norm": 0.674323320388794,
+      "learning_rate": 1.1372041420118345e-05,
+      "loss": 0.4207,
+      "step": 43000
+    },
+    {
+      "epoch": 25.74,
+      "grad_norm": 0.6965672969818115,
+      "learning_rate": 1.1140902366863906e-05,
+      "loss": 0.4244,
+      "step": 43500
+    },
+    {
+      "epoch": 26.04,
+      "grad_norm": 0.6351442337036133,
+      "learning_rate": 1.0909763313609469e-05,
+      "loss": 0.4228,
+      "step": 44000
+    },
+    {
+      "epoch": 26.33,
+      "grad_norm": 0.590655505657196,
+      "learning_rate": 1.0678624260355031e-05,
+      "loss": 0.4207,
+      "step": 44500
+    },
+    {
+      "epoch": 26.63,
+      "grad_norm": 0.6553508639335632,
+      "learning_rate": 1.044748520710059e-05,
+      "loss": 0.422,
+      "step": 45000
+    },
+    {
+      "epoch": 26.92,
+      "grad_norm": 0.6216753721237183,
+      "learning_rate": 1.0216346153846154e-05,
+      "loss": 0.4196,
+      "step": 45500
+    },
+    {
+      "epoch": 27.22,
+      "grad_norm": 0.6628888249397278,
+      "learning_rate": 9.985207100591717e-06,
+      "loss": 0.4194,
+      "step": 46000
+    },
+    {
+      "epoch": 27.51,
+      "grad_norm": 0.6111788749694824,
+      "learning_rate": 9.754068047337278e-06,
+      "loss": 0.4189,
+      "step": 46500
+    },
+    {
+      "epoch": 27.81,
+      "grad_norm": 0.5751132965087891,
+      "learning_rate": 9.52292899408284e-06,
+      "loss": 0.4182,
+      "step": 47000
+    },
+    {
+      "epoch": 28.11,
+      "grad_norm": 0.6333842873573303,
+      "learning_rate": 9.291789940828403e-06,
+      "loss": 0.4172,
+      "step": 47500
+    },
+    {
+      "epoch": 28.4,
+      "grad_norm": 0.5846462845802307,
+      "learning_rate": 9.060650887573965e-06,
+      "loss": 0.417,
+      "step": 48000
+    },
+    {
+      "epoch": 28.7,
+      "grad_norm": 0.5921066999435425,
+      "learning_rate": 8.829511834319527e-06,
+      "loss": 0.4178,
+      "step": 48500
+    },
+    {
+      "epoch": 28.99,
+      "grad_norm": 0.6645215153694153,
+      "learning_rate": 8.59837278106509e-06,
+      "loss": 0.4166,
+      "step": 49000
+    },
+    {
+      "epoch": 29.29,
+      "grad_norm": 0.6453720331192017,
+      "learning_rate": 8.36723372781065e-06,
+      "loss": 0.4142,
+      "step": 49500
+    },
+    {
+      "epoch": 29.59,
+      "grad_norm": 0.6401262283325195,
+      "learning_rate": 8.136094674556213e-06,
+      "loss": 0.4152,
+      "step": 50000
+    },
+    {
+      "epoch": 29.88,
+      "grad_norm": 0.6776517033576965,
+      "learning_rate": 7.904955621301775e-06,
+      "loss": 0.415,
+      "step": 50500
+    },
+    {
+      "epoch": 30.18,
+      "grad_norm": 0.6697096228599548,
+      "learning_rate": 7.673816568047338e-06,
+      "loss": 0.4116,
+      "step": 51000
+    },
+    {
+      "epoch": 30.47,
+      "grad_norm": 0.6276474595069885,
+      "learning_rate": 7.442677514792899e-06,
+      "loss": 0.4127,
+      "step": 51500
+    },
+    {
+      "epoch": 30.77,
+      "grad_norm": 0.7491399049758911,
+      "learning_rate": 7.211538461538462e-06,
+      "loss": 0.4188,
+      "step": 52000
+    },
+    {
+      "epoch": 31.07,
+      "grad_norm": 0.7292032837867737,
+      "learning_rate": 6.980399408284024e-06,
+      "loss": 0.4141,
+      "step": 52500
+    },
+    {
+      "epoch": 31.36,
+      "grad_norm": 0.6432758569717407,
+      "learning_rate": 6.749260355029585e-06,
+      "loss": 0.4109,
+      "step": 53000
+    },
+    {
+      "epoch": 31.66,
+      "grad_norm": 0.7142419815063477,
+      "learning_rate": 6.518121301775148e-06,
+      "loss": 0.4124,
+      "step": 53500
+    },
+    {
+      "epoch": 31.95,
+      "grad_norm": 0.737123966217041,
+      "learning_rate": 6.28698224852071e-06,
+      "loss": 0.4106,
+      "step": 54000
+    },
+    {
+      "epoch": 32.25,
+      "grad_norm": 0.6416926980018616,
+      "learning_rate": 6.055843195266272e-06,
+      "loss": 0.4131,
+      "step": 54500
+    },
+    {
+      "epoch": 32.54,
+      "grad_norm": 0.8014604449272156,
+      "learning_rate": 5.824704142011835e-06,
+      "loss": 0.4113,
+      "step": 55000
+    },
+    {
+      "epoch": 32.84,
+      "grad_norm": 0.745812714099884,
+      "learning_rate": 5.593565088757396e-06,
+      "loss": 0.4119,
+      "step": 55500
+    },
+    {
+      "epoch": 33.14,
+      "grad_norm": 0.753264307975769,
+      "learning_rate": 5.362426035502958e-06,
+      "loss": 0.4109,
+      "step": 56000
+    },
+    {
+      "epoch": 33.43,
+      "grad_norm": 0.7509620189666748,
+      "learning_rate": 5.131286982248521e-06,
+      "loss": 0.4094,
+      "step": 56500
+    },
+    {
+      "epoch": 33.73,
+      "grad_norm": 0.6729797720909119,
+      "learning_rate": 4.900147928994083e-06,
+      "loss": 0.4086,
+      "step": 57000
+    },
+    {
+      "epoch": 34.02,
+      "grad_norm": 0.6696120500564575,
+      "learning_rate": 4.669008875739646e-06,
+      "loss": 0.4109,
+      "step": 57500
+    },
+    {
+      "epoch": 34.32,
+      "grad_norm": 0.591411828994751,
+      "learning_rate": 4.437869822485207e-06,
+      "loss": 0.4082,
+      "step": 58000
+    },
+    {
+      "epoch": 34.62,
+      "grad_norm": 0.6425775289535522,
+      "learning_rate": 4.206730769230769e-06,
+      "loss": 0.4096,
+      "step": 58500
+    },
+    {
+      "epoch": 34.91,
+      "grad_norm": 0.6289854645729065,
+      "learning_rate": 3.975591715976332e-06,
+      "loss": 0.4101,
+      "step": 59000
+    },
+    {
+      "epoch": 35.21,
+      "grad_norm": 0.6215291023254395,
+      "learning_rate": 3.7444526627218935e-06,
+      "loss": 0.4094,
+      "step": 59500
+    },
+    {
+      "epoch": 35.5,
+      "grad_norm": 0.7314534783363342,
+      "learning_rate": 3.513313609467456e-06,
+      "loss": 0.4062,
+      "step": 60000
+    },
+    {
+      "epoch": 35.8,
+      "grad_norm": 0.6580629348754883,
+      "learning_rate": 3.2821745562130175e-06,
+      "loss": 0.4078,
+      "step": 60500
+    },
+    {
+      "epoch": 36.09,
+      "grad_norm": 0.6824979186058044,
+      "learning_rate": 3.05103550295858e-06,
+      "loss": 0.4098,
+      "step": 61000
+    },
+    {
+      "epoch": 36.39,
+      "grad_norm": 0.6403664946556091,
+      "learning_rate": 2.8198964497041423e-06,
+      "loss": 0.4078,
+      "step": 61500
+    },
+    {
+      "epoch": 36.69,
+      "grad_norm": 0.6590360999107361,
+      "learning_rate": 2.5887573964497043e-06,
+      "loss": 0.4076,
+      "step": 62000
+    },
+    {
+      "epoch": 36.98,
+      "grad_norm": 0.8094596862792969,
+      "learning_rate": 2.3576183431952663e-06,
+      "loss": 0.4059,
+      "step": 62500
+    },
+    {
+      "epoch": 37.28,
+      "grad_norm": 0.6609135270118713,
+      "learning_rate": 2.1264792899408283e-06,
+      "loss": 0.4065,
+      "step": 63000
+    },
+    {
+      "epoch": 37.57,
+      "grad_norm": 0.6913712024688721,
+      "learning_rate": 1.8953402366863905e-06,
+      "loss": 0.4033,
+      "step": 63500
+    },
+    {
+      "epoch": 37.87,
+      "grad_norm": 0.6110714077949524,
+      "learning_rate": 1.6642011834319528e-06,
+      "loss": 0.4067,
+      "step": 64000
+    },
+    {
+      "epoch": 38.17,
+      "grad_norm": 0.7397092580795288,
+      "learning_rate": 1.4330621301775148e-06,
+      "loss": 0.4056,
+      "step": 64500
+    },
+    {
+      "epoch": 38.46,
+      "grad_norm": 0.6656752824783325,
+      "learning_rate": 1.201923076923077e-06,
+      "loss": 0.403,
+      "step": 65000
+    },
+    {
+      "epoch": 38.76,
+      "grad_norm": 0.70158451795578,
+      "learning_rate": 9.70784023668639e-07,
+      "loss": 0.4063,
+      "step": 65500
+    },
+    {
+      "epoch": 39.05,
+      "grad_norm": 0.7131240367889404,
+      "learning_rate": 7.396449704142012e-07,
+      "loss": 0.4058,
+      "step": 66000
+    },
+    {
+      "epoch": 39.35,
+      "grad_norm": 0.7075223922729492,
+      "learning_rate": 5.085059171597633e-07,
+      "loss": 0.4058,
+      "step": 66500
+    },
+    {
+      "epoch": 39.64,
+      "grad_norm": 0.7466796040534973,
+      "learning_rate": 2.7736686390532544e-07,
+      "loss": 0.4043,
+      "step": 67000
+    },
+    {
+      "epoch": 39.94,
+      "grad_norm": 0.6051456332206726,
+      "learning_rate": 4.6227810650887574e-08,
+      "loss": 0.4047,
+      "step": 67500
+    },
+    {
+      "epoch": 40.0,
+      "step": 67600,
+      "total_flos": 5.463929616806707e+19,
+      "train_loss": 0.47529568801970173,
+      "train_runtime": 7092.3542,
+      "train_samples_per_second": 76.223,
+      "train_steps_per_second": 9.531
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 67600,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 40,
+  "save_steps": 1000000000,
+  "total_flos": 5.463929616806707e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}