Training in progress, step 475, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +830 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ddcea3f015ba7baf7240a381987d1fea89ddbc3bfa6cd80ad532930250eb92a
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:c444a5739e2bec3b1e3f04bd30a01356e1edcb8c01b2ba0dd74328accd2825b2
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6606e17bd4f0435631121771e7600dbc4c37c6c76273295ec21f9a17bd27b35f
 size 85723732

 version https://git-lfs.github.com/spec/v1
+oid sha256:08d25728ac5fe47a0ad27b3da864cab99af91f3548996d69185914cd67c672bb
 size 85723732

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b732712c22d6c942fca8e85d6d6b8d91964b43f4fe22cb00333cc39c1c2eda24
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1126b5614f51e5fa36121a9290d6b01e210532249374ad3c426067b6e3d80cfd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02847c1628ecbc90ee51fc4d3fb5a61ddbee1e60d453008afab16af26f807227
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d3ab8b8dc5babc32b4adc3c596b50dd0fcac27b238d3838d86c3c68054c541d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.751578947368421,
   "eval_steps": 119,
-  "global_step": 357,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2538,6 +2538,832 @@
       "eval_samples_per_second": 4.073,
       "eval_steps_per_second": 4.073,
       "step": 357
     }
   ],
   "logging_steps": 1,
@@ -2552,12 +3378,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.3245823894552576e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 119,
+  "global_step": 475,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.073,
       "eval_steps_per_second": 4.073,
       "step": 357
+    },
+    {
+      "epoch": 0.7536842105263157,
+      "grad_norm": 0.8696005940437317,
+      "learning_rate": 2.964852511800519e-05,
+      "loss": 1.0904,
+      "step": 358
+    },
+    {
+      "epoch": 0.7557894736842106,
+      "grad_norm": 0.7804014086723328,
+      "learning_rate": 2.9169990463359555e-05,
+      "loss": 1.0226,
+      "step": 359
+    },
+    {
+      "epoch": 0.7578947368421053,
+      "grad_norm": 0.8228606581687927,
+      "learning_rate": 2.869468883687798e-05,
+      "loss": 1.3508,
+      "step": 360
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 0.6658822298049927,
+      "learning_rate": 2.8222641933652117e-05,
+      "loss": 1.1766,
+      "step": 361
+    },
+    {
+      "epoch": 0.7621052631578947,
+      "grad_norm": 0.7910031676292419,
+      "learning_rate": 2.7753871300212142e-05,
+      "loss": 0.7955,
+      "step": 362
+    },
+    {
+      "epoch": 0.7642105263157895,
+      "grad_norm": 0.7152170538902283,
+      "learning_rate": 2.7288398333543064e-05,
+      "loss": 0.9675,
+      "step": 363
+    },
+    {
+      "epoch": 0.7663157894736842,
+      "grad_norm": 0.5529299974441528,
+      "learning_rate": 2.6826244280108437e-05,
+      "loss": 0.9956,
+      "step": 364
+    },
+    {
+      "epoch": 0.7684210526315789,
+      "grad_norm": 0.7928904891014099,
+      "learning_rate": 2.6367430234880284e-05,
+      "loss": 0.963,
+      "step": 365
+    },
+    {
+      "epoch": 0.7705263157894737,
+      "grad_norm": 0.8560205101966858,
+      "learning_rate": 2.591197714037631e-05,
+      "loss": 1.164,
+      "step": 366
+    },
+    {
+      "epoch": 0.7726315789473684,
+      "grad_norm": 0.5956188440322876,
+      "learning_rate": 2.5459905785704042e-05,
+      "loss": 1.1437,
+      "step": 367
+    },
+    {
+      "epoch": 0.7747368421052632,
+      "grad_norm": 1.018763542175293,
+      "learning_rate": 2.5011236805611814e-05,
+      "loss": 1.1257,
+      "step": 368
+    },
+    {
+      "epoch": 0.7768421052631579,
+      "grad_norm": 2.8923377990722656,
+      "learning_rate": 2.4565990679546914e-05,
+      "loss": 0.8183,
+      "step": 369
+    },
+    {
+      "epoch": 0.7789473684210526,
+      "grad_norm": 0.7362539172172546,
+      "learning_rate": 2.4124187730720917e-05,
+      "loss": 1.2692,
+      "step": 370
+    },
+    {
+      "epoch": 0.7810526315789473,
+      "grad_norm": 0.9826866388320923,
+      "learning_rate": 2.368584812518184e-05,
+      "loss": 0.8437,
+      "step": 371
+    },
+    {
+      "epoch": 0.783157894736842,
+      "grad_norm": 0.8725941181182861,
+      "learning_rate": 2.3250991870893835e-05,
+      "loss": 1.0641,
+      "step": 372
+    },
+    {
+      "epoch": 0.7852631578947369,
+      "grad_norm": 2.081977605819702,
+      "learning_rate": 2.2819638816823797e-05,
+      "loss": 1.1523,
+      "step": 373
+    },
+    {
+      "epoch": 0.7873684210526316,
+      "grad_norm": 2.948880434036255,
+      "learning_rate": 2.2391808652035517e-05,
+      "loss": 0.6608,
+      "step": 374
+    },
+    {
+      "epoch": 0.7894736842105263,
+      "grad_norm": 0.845608651638031,
+      "learning_rate": 2.1967520904790827e-05,
+      "loss": 1.0555,
+      "step": 375
+    },
+    {
+      "epoch": 0.791578947368421,
+      "grad_norm": 0.850184440612793,
+      "learning_rate": 2.154679494165829e-05,
+      "loss": 1.1188,
+      "step": 376
+    },
+    {
+      "epoch": 0.7936842105263158,
+      "grad_norm": 0.8228728175163269,
+      "learning_rate": 2.1129649966629184e-05,
+      "loss": 0.9344,
+      "step": 377
+    },
+    {
+      "epoch": 0.7957894736842105,
+      "grad_norm": 0.9157503843307495,
+      "learning_rate": 2.0716105020241072e-05,
+      "loss": 0.7635,
+      "step": 378
+    },
+    {
+      "epoch": 0.7978947368421052,
+      "grad_norm": 1.4381229877471924,
+      "learning_rate": 2.0306178978708514e-05,
+      "loss": 1.258,
+      "step": 379
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.6052026748657227,
+      "learning_rate": 1.9899890553061562e-05,
+      "loss": 1.1365,
+      "step": 380
+    },
+    {
+      "epoch": 0.8021052631578948,
+      "grad_norm": 0.8866299390792847,
+      "learning_rate": 1.9497258288291654e-05,
+      "loss": 1.0385,
+      "step": 381
+    },
+    {
+      "epoch": 0.8042105263157895,
+      "grad_norm": 0.743588387966156,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 1.0008,
+      "step": 382
+    },
+    {
+      "epoch": 0.8063157894736842,
+      "grad_norm": 0.797770082950592,
+      "learning_rate": 1.8703035586084816e-05,
+      "loss": 0.82,
+      "step": 383
+    },
+    {
+      "epoch": 0.8084210526315789,
+      "grad_norm": 1.2986137866973877,
+      "learning_rate": 1.831148140085762e-05,
+      "loss": 0.9923,
+      "step": 384
+    },
+    {
+      "epoch": 0.8105263157894737,
+      "grad_norm": 1.0393884181976318,
+      "learning_rate": 1.7923655879272393e-05,
+      "loss": 1.4782,
+      "step": 385
+    },
+    {
+      "epoch": 0.8126315789473684,
+      "grad_norm": 0.961280882358551,
+      "learning_rate": 1.753957672358324e-05,
+      "loss": 0.9412,
+      "step": 386
+    },
+    {
+      "epoch": 0.8147368421052632,
+      "grad_norm": 0.5964862108230591,
+      "learning_rate": 1.7159261465041952e-05,
+      "loss": 0.9657,
+      "step": 387
+    },
+    {
+      "epoch": 0.8168421052631579,
+      "grad_norm": 1.1498229503631592,
+      "learning_rate": 1.6782727463097624e-05,
+      "loss": 0.8533,
+      "step": 388
+    },
+    {
+      "epoch": 0.8189473684210526,
+      "grad_norm": 1.617012619972229,
+      "learning_rate": 1.6409991904604173e-05,
+      "loss": 0.6844,
+      "step": 389
+    },
+    {
+      "epoch": 0.8210526315789474,
+      "grad_norm": 0.642407238483429,
+      "learning_rate": 1.60410718030361e-05,
+      "loss": 0.7084,
+      "step": 390
+    },
+    {
+      "epoch": 0.8231578947368421,
+      "grad_norm": 0.6749714016914368,
+      "learning_rate": 1.5675983997711795e-05,
+      "loss": 1.1988,
+      "step": 391
+    },
+    {
+      "epoch": 0.8252631578947368,
+      "grad_norm": 1.0748071670532227,
+      "learning_rate": 1.5314745153024766e-05,
+      "loss": 0.856,
+      "step": 392
+    },
+    {
+      "epoch": 0.8273684210526315,
+      "grad_norm": 1.0286914110183716,
+      "learning_rate": 1.495737175768326e-05,
+      "loss": 1.1481,
+      "step": 393
+    },
+    {
+      "epoch": 0.8294736842105264,
+      "grad_norm": 0.8149781227111816,
+      "learning_rate": 1.4603880123957447e-05,
+      "loss": 0.7169,
+      "step": 394
+    },
+    {
+      "epoch": 0.8315789473684211,
+      "grad_norm": 1.1764047145843506,
+      "learning_rate": 1.425428638693489e-05,
+      "loss": 1.014,
+      "step": 395
+    },
+    {
+      "epoch": 0.8336842105263158,
+      "grad_norm": 1.1029857397079468,
+      "learning_rate": 1.3908606503784139e-05,
+      "loss": 1.2927,
+      "step": 396
+    },
+    {
+      "epoch": 0.8357894736842105,
+      "grad_norm": 1.063981056213379,
+      "learning_rate": 1.356685625302625e-05,
+      "loss": 0.5832,
+      "step": 397
+    },
+    {
+      "epoch": 0.8378947368421052,
+      "grad_norm": 0.8528422117233276,
+      "learning_rate": 1.3229051233814637e-05,
+      "loss": 0.8959,
+      "step": 398
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 0.8783009052276611,
+      "learning_rate": 1.2895206865223064e-05,
+      "loss": 0.8633,
+      "step": 399
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 1.0344438552856445,
+      "learning_rate": 1.2565338385541792e-05,
+      "loss": 1.7439,
+      "step": 400
+    },
+    {
+      "epoch": 0.8442105263157895,
+      "grad_norm": 0.9089331030845642,
+      "learning_rate": 1.2239460851582118e-05,
+      "loss": 0.9778,
+      "step": 401
+    },
+    {
+      "epoch": 0.8463157894736842,
+      "grad_norm": 1.5010464191436768,
+      "learning_rate": 1.1917589137989005e-05,
+      "loss": 0.8401,
+      "step": 402
+    },
+    {
+      "epoch": 0.848421052631579,
+      "grad_norm": 1.1659886837005615,
+      "learning_rate": 1.1599737936562149e-05,
+      "loss": 1.1214,
+      "step": 403
+    },
+    {
+      "epoch": 0.8505263157894737,
+      "grad_norm": 0.781868040561676,
+      "learning_rate": 1.1285921755585504e-05,
+      "loss": 1.1831,
+      "step": 404
+    },
+    {
+      "epoch": 0.8526315789473684,
+      "grad_norm": 0.8510306477546692,
+      "learning_rate": 1.097615491916485e-05,
+      "loss": 1.0715,
+      "step": 405
+    },
+    {
+      "epoch": 0.8547368421052631,
+      "grad_norm": 0.7362077236175537,
+      "learning_rate": 1.0670451566574102e-05,
+      "loss": 0.9367,
+      "step": 406
+    },
+    {
+      "epoch": 0.8568421052631578,
+      "grad_norm": 1.3995620012283325,
+      "learning_rate": 1.0368825651609893e-05,
+      "loss": 0.8952,
+      "step": 407
+    },
+    {
+      "epoch": 0.8589473684210527,
+      "grad_norm": 0.8937683701515198,
+      "learning_rate": 1.007129094195468e-05,
+      "loss": 1.2489,
+      "step": 408
+    },
+    {
+      "epoch": 0.8610526315789474,
+      "grad_norm": 2.044285774230957,
+      "learning_rate": 9.777861018548251e-06,
+      "loss": 1.658,
+      "step": 409
+    },
+    {
+      "epoch": 0.8631578947368421,
+      "grad_norm": 1.3045108318328857,
+      "learning_rate": 9.488549274967872e-06,
+      "loss": 0.8359,
+      "step": 410
+    },
+    {
+      "epoch": 0.8652631578947368,
+      "grad_norm": 0.6791781783103943,
+      "learning_rate": 9.203368916817012e-06,
+      "loss": 1.0212,
+      "step": 411
+    },
+    {
+      "epoch": 0.8673684210526316,
+      "grad_norm": 1.7533940076828003,
+      "learning_rate": 8.92233296112236e-06,
+      "loss": 0.7901,
+      "step": 412
+    },
+    {
+      "epoch": 0.8694736842105263,
+      "grad_norm": 0.5513384938240051,
+      "learning_rate": 8.645454235739903e-06,
+      "loss": 0.8964,
+      "step": 413
+    },
+    {
+      "epoch": 0.871578947368421,
+      "grad_norm": 1.2993557453155518,
+      "learning_rate": 8.372745378769309e-06,
+      "loss": 0.9348,
+      "step": 414
+    },
+    {
+      "epoch": 0.8736842105263158,
+      "grad_norm": 1.2195082902908325,
+      "learning_rate": 8.10421883797694e-06,
+      "loss": 1.2758,
+      "step": 415
+    },
+    {
+      "epoch": 0.8757894736842106,
+      "grad_norm": 1.2428478002548218,
+      "learning_rate": 7.839886870227909e-06,
+      "loss": 0.9534,
+      "step": 416
+    },
+    {
+      "epoch": 0.8778947368421053,
+      "grad_norm": 0.5317150950431824,
+      "learning_rate": 7.5797615409264335e-06,
+      "loss": 0.7835,
+      "step": 417
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.7354329228401184,
+      "learning_rate": 7.32385472346514e-06,
+      "loss": 1.1074,
+      "step": 418
+    },
+    {
+      "epoch": 0.8821052631578947,
+      "grad_norm": 1.009934663772583,
+      "learning_rate": 7.072178098683246e-06,
+      "loss": 1.253,
+      "step": 419
+    },
+    {
+      "epoch": 0.8842105263157894,
+      "grad_norm": 0.8174380660057068,
+      "learning_rate": 6.824743154333157e-06,
+      "loss": 1.0671,
+      "step": 420
+    },
+    {
+      "epoch": 0.8863157894736842,
+      "grad_norm": 0.777045726776123,
+      "learning_rate": 6.581561184556295e-06,
+      "loss": 1.0806,
+      "step": 421
+    },
+    {
+      "epoch": 0.888421052631579,
+      "grad_norm": 1.013598918914795,
+      "learning_rate": 6.342643289367522e-06,
+      "loss": 1.1415,
+      "step": 422
+    },
+    {
+      "epoch": 0.8905263157894737,
+      "grad_norm": 0.9286550283432007,
+      "learning_rate": 6.108000374148448e-06,
+      "loss": 0.9374,
+      "step": 423
+    },
+    {
+      "epoch": 0.8926315789473684,
+      "grad_norm": 0.8254793286323547,
+      "learning_rate": 5.87764314914967e-06,
+      "loss": 0.8427,
+      "step": 424
+    },
+    {
+      "epoch": 0.8947368421052632,
+      "grad_norm": 1.3116326332092285,
+      "learning_rate": 5.651582129001986e-06,
+      "loss": 1.2204,
+      "step": 425
+    },
+    {
+      "epoch": 0.8968421052631579,
+      "grad_norm": 0.5832639932632446,
+      "learning_rate": 5.429827632236284e-06,
+      "loss": 0.7114,
+      "step": 426
+    },
+    {
+      "epoch": 0.8989473684210526,
+      "grad_norm": 0.719308078289032,
+      "learning_rate": 5.212389780812732e-06,
+      "loss": 0.8413,
+      "step": 427
+    },
+    {
+      "epoch": 0.9010526315789473,
+      "grad_norm": 0.8241797089576721,
+      "learning_rate": 4.999278499658666e-06,
+      "loss": 0.9538,
+      "step": 428
+    },
+    {
+      "epoch": 0.9031578947368422,
+      "grad_norm": 0.68552565574646,
+      "learning_rate": 4.790503516215572e-06,
+      "loss": 1.0962,
+      "step": 429
+    },
+    {
+      "epoch": 0.9052631578947369,
+      "grad_norm": 1.2444804906845093,
+      "learning_rate": 4.586074359995119e-06,
+      "loss": 1.2615,
+      "step": 430
+    },
+    {
+      "epoch": 0.9073684210526316,
+      "grad_norm": 1.2105733156204224,
+      "learning_rate": 4.386000362144138e-06,
+      "loss": 1.1771,
+      "step": 431
+    },
+    {
+      "epoch": 0.9094736842105263,
+      "grad_norm": 0.7618857026100159,
+      "learning_rate": 4.190290655018736e-06,
+      "loss": 1.0337,
+      "step": 432
+    },
+    {
+      "epoch": 0.911578947368421,
+      "grad_norm": 0.6650370955467224,
+      "learning_rate": 3.998954171767422e-06,
+      "loss": 1.0167,
+      "step": 433
+    },
+    {
+      "epoch": 0.9136842105263158,
+      "grad_norm": 0.8902232050895691,
+      "learning_rate": 3.811999645923414e-06,
+      "loss": 1.0959,
+      "step": 434
+    },
+    {
+      "epoch": 0.9157894736842105,
+      "grad_norm": 0.8037033081054688,
+      "learning_rate": 3.6294356110059157e-06,
+      "loss": 1.0283,
+      "step": 435
+    },
+    {
+      "epoch": 0.9178947368421052,
+      "grad_norm": 0.7740287780761719,
+      "learning_rate": 3.451270400130646e-06,
+      "loss": 0.9952,
+      "step": 436
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.6354364156723022,
+      "learning_rate": 3.277512145629502e-06,
+      "loss": 0.7446,
+      "step": 437
+    },
+    {
+      "epoch": 0.9221052631578948,
+      "grad_norm": 1.0572397708892822,
+      "learning_rate": 3.10816877867931e-06,
+      "loss": 1.0779,
+      "step": 438
+    },
+    {
+      "epoch": 0.9242105263157895,
+      "grad_norm": 1.2464516162872314,
+      "learning_rate": 2.943248028939838e-06,
+      "loss": 1.1318,
+      "step": 439
+    },
+    {
+      "epoch": 0.9263157894736842,
+      "grad_norm": 1.017857313156128,
+      "learning_rate": 2.7827574242009437e-06,
+      "loss": 0.7569,
+      "step": 440
+    },
+    {
+      "epoch": 0.9284210526315789,
+      "grad_norm": 0.8179978132247925,
+      "learning_rate": 2.626704290039017e-06,
+      "loss": 0.9221,
+      "step": 441
+    },
+    {
+      "epoch": 0.9305263157894736,
+      "grad_norm": 0.7406687140464783,
+      "learning_rate": 2.4750957494826033e-06,
+      "loss": 0.9411,
+      "step": 442
+    },
+    {
+      "epoch": 0.9326315789473684,
+      "grad_norm": 0.7985221147537231,
+      "learning_rate": 2.327938722687184e-06,
+      "loss": 0.5579,
+      "step": 443
+    },
+    {
+      "epoch": 0.9347368421052632,
+      "grad_norm": 0.8860235214233398,
+      "learning_rate": 2.1852399266194314e-06,
+      "loss": 1.2353,
+      "step": 444
+    },
+    {
+      "epoch": 0.9368421052631579,
+      "grad_norm": 0.8313080072402954,
+      "learning_rate": 2.0470058747505516e-06,
+      "loss": 0.9344,
+      "step": 445
+    },
+    {
+      "epoch": 0.9389473684210526,
+      "grad_norm": 3.333054780960083,
+      "learning_rate": 1.9132428767589473e-06,
+      "loss": 1.4123,
+      "step": 446
+    },
+    {
+      "epoch": 0.9410526315789474,
+      "grad_norm": 1.0423784255981445,
+      "learning_rate": 1.7839570382422787e-06,
+      "loss": 1.0753,
+      "step": 447
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "grad_norm": 0.7692586183547974,
+      "learning_rate": 1.6591542604387445e-06,
+      "loss": 0.669,
+      "step": 448
+    },
+    {
+      "epoch": 0.9452631578947368,
+      "grad_norm": 0.7186653017997742,
+      "learning_rate": 1.538840239957684e-06,
+      "loss": 1.2113,
+      "step": 449
+    },
+    {
+      "epoch": 0.9473684210526315,
+      "grad_norm": 0.7315964698791504,
+      "learning_rate": 1.4230204685196203e-06,
+      "loss": 1.2322,
+      "step": 450
+    },
+    {
+      "epoch": 0.9494736842105264,
+      "grad_norm": 1.0726860761642456,
+      "learning_rate": 1.3117002327055927e-06,
+      "loss": 0.9252,
+      "step": 451
+    },
+    {
+      "epoch": 0.9515789473684211,
+      "grad_norm": 0.6930716037750244,
+      "learning_rate": 1.20488461371574e-06,
+      "loss": 0.9903,
+      "step": 452
+    },
+    {
+      "epoch": 0.9536842105263158,
+      "grad_norm": 1.1421600580215454,
+      "learning_rate": 1.102578487137529e-06,
+      "loss": 1.0869,
+      "step": 453
+    },
+    {
+      "epoch": 0.9557894736842105,
+      "grad_norm": 0.6389675736427307,
+      "learning_rate": 1.004786522723089e-06,
+      "loss": 0.7617,
+      "step": 454
+    },
+    {
+      "epoch": 0.9578947368421052,
+      "grad_norm": 1.449903964996338,
+      "learning_rate": 9.11513184176116e-07,
+      "loss": 1.0412,
+      "step": 455
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.7298595905303955,
+      "learning_rate": 8.227627289481121e-07,
+      "loss": 0.9267,
+      "step": 456
+    },
+    {
+      "epoch": 0.9621052631578947,
+      "grad_norm": 0.988619327545166,
+      "learning_rate": 7.385392080440534e-07,
+      "loss": 0.9785,
+      "step": 457
+    },
+    {
+      "epoch": 0.9642105263157895,
+      "grad_norm": 1.0173723697662354,
+      "learning_rate": 6.588464658374815e-07,
+      "loss": 1.4313,
+      "step": 458
+    },
+    {
+      "epoch": 0.9663157894736842,
+      "grad_norm": 0.8676223158836365,
+      "learning_rate": 5.836881398950667e-07,
+      "loss": 1.1253,
+      "step": 459
+    },
+    {
+      "epoch": 0.968421052631579,
+      "grad_norm": 2.1744537353515625,
+      "learning_rate": 5.130676608104845e-07,
+      "loss": 0.9635,
+      "step": 460
+    },
+    {
+      "epoch": 0.9705263157894737,
+      "grad_norm": 0.9125698804855347,
+      "learning_rate": 4.469882520479196e-07,
+      "loss": 1.2048,
+      "step": 461
+    },
+    {
+      "epoch": 0.9726315789473684,
+      "grad_norm": 0.8515905141830444,
+      "learning_rate": 3.8545292979486057e-07,
+      "loss": 0.9358,
+      "step": 462
+    },
+    {
+      "epoch": 0.9747368421052631,
+      "grad_norm": 0.8601663708686829,
+      "learning_rate": 3.2846450282447703e-07,
+      "loss": 1.0253,
+      "step": 463
+    },
+    {
+      "epoch": 0.9768421052631578,
+      "grad_norm": 0.8213205337524414,
+      "learning_rate": 2.760255723673888e-07,
+      "loss": 0.8577,
+      "step": 464
+    },
+    {
+      "epoch": 0.9789473684210527,
+      "grad_norm": 0.7541871666908264,
+      "learning_rate": 2.2813853199292746e-07,
+      "loss": 0.9517,
+      "step": 465
+    },
+    {
+      "epoch": 0.9810526315789474,
+      "grad_norm": 0.7226133346557617,
+      "learning_rate": 1.8480556749991274e-07,
+      "loss": 1.0434,
+      "step": 466
+    },
+    {
+      "epoch": 0.9831578947368421,
+      "grad_norm": 1.1320992708206177,
+      "learning_rate": 1.460286568168212e-07,
+      "loss": 0.7791,
+      "step": 467
+    },
+    {
+      "epoch": 0.9852631578947368,
+      "grad_norm": 0.9081979990005493,
+      "learning_rate": 1.1180956991160286e-07,
+      "loss": 0.3705,
+      "step": 468
+    },
+    {
+      "epoch": 0.9873684210526316,
+      "grad_norm": 2.7810940742492676,
+      "learning_rate": 8.214986871076802e-08,
+      "loss": 1.2752,
+      "step": 469
+    },
+    {
+      "epoch": 0.9894736842105263,
+      "grad_norm": 0.7669661045074463,
+      "learning_rate": 5.705090702819993e-08,
+      "loss": 0.7446,
+      "step": 470
+    },
+    {
+      "epoch": 0.991578947368421,
+      "grad_norm": 1.0128413438796997,
+      "learning_rate": 3.6513830503293045e-08,
+      "loss": 1.2359,
+      "step": 471
+    },
+    {
+      "epoch": 0.9936842105263158,
+      "grad_norm": 1.728697657585144,
+      "learning_rate": 2.0539576548717076e-08,
+      "loss": 0.7628,
+      "step": 472
+    },
+    {
+      "epoch": 0.9957894736842106,
+      "grad_norm": 0.7068576812744141,
+      "learning_rate": 9.128874307551272e-09,
+      "loss": 1.063,
+      "step": 473
+    },
+    {
+      "epoch": 0.9978947368421053,
+      "grad_norm": 0.7579674124717712,
+      "learning_rate": 2.282244620088747e-09,
+      "loss": 0.8835,
+      "step": 474
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7143834829330444,
+      "learning_rate": 0.0,
+      "loss": 1.1322,
+      "step": 475
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.425821325990298e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null