Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:515bd1fd2e800ff785b6479fdd6957fd4bf27e5b1c30a3f1cf8aae527f08a08b
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:8198e9317f349ff3fd1e2f22f5f0e02e0d464b3b2d14b422bd2542cc62793a64
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bd05b1422ea617e033a87629b7a8d0f6328816ebf140850f114f1f26f4cb7a8
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6174d7213eebb72e1fe3ca63d3d72d9fd39633ebe8d894d7532140049e8df32
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:809cadbdaab075c11480d526dd51b8f21d82fd34b84730fb1208ace6f51dd4d6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9fb82bc159a82b428f124a5ab73b5266e9f4847bee821a1e25a1056b8c500ae4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5e2a8ca8cbdb222b4fcc6a743fadf9ab6adfaf2459d28805db388e0cbe4b5b9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e99fbb72294539e29c605dada5b1e203613200e6c1e835a4f2f4baed9ec2604
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.35413047671318054,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.23529411764705882,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,714 @@
       "eval_samples_per_second": 12.722,
       "eval_steps_per_second": 3.18,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2162,12 +2870,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.536268794133217e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3466954827308655,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.3137254901960784,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.722,
       "eval_steps_per_second": 3.18,
       "step": 300
+    },
+    {
+      "epoch": 0.23607843137254902,
+      "grad_norm": 2.4137773513793945,
+      "learning_rate": 1.0552102216287209e-05,
+      "loss": 3.5898,
+      "step": 301
+    },
+    {
+      "epoch": 0.23686274509803923,
+      "grad_norm": 2.2105584144592285,
+      "learning_rate": 1.0351143194462377e-05,
+      "loss": 1.8317,
+      "step": 302
+    },
+    {
+      "epoch": 0.2376470588235294,
+      "grad_norm": 2.3090407848358154,
+      "learning_rate": 1.0151783601632461e-05,
+      "loss": 1.5182,
+      "step": 303
+    },
+    {
+      "epoch": 0.2384313725490196,
+      "grad_norm": 2.195463180541992,
+      "learning_rate": 9.954036373955764e-06,
+      "loss": 1.2783,
+      "step": 304
+    },
+    {
+      "epoch": 0.23921568627450981,
+      "grad_norm": 2.6486854553222656,
+      "learning_rate": 9.757914342966493e-06,
+      "loss": 1.4084,
+      "step": 305
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.396476984024048,
+      "learning_rate": 9.563430234742194e-06,
+      "loss": 1.3099,
+      "step": 306
+    },
+    {
+      "epoch": 0.2407843137254902,
+      "grad_norm": 2.8271055221557617,
+      "learning_rate": 9.370596669077936e-06,
+      "loss": 1.3866,
+      "step": 307
+    },
+    {
+      "epoch": 0.2415686274509804,
+      "grad_norm": 2.6021790504455566,
+      "learning_rate": 9.179426158667445e-06,
+      "loss": 1.1333,
+      "step": 308
+    },
+    {
+      "epoch": 0.24235294117647058,
+      "grad_norm": 2.9245004653930664,
+      "learning_rate": 8.989931108291197e-06,
+      "loss": 1.0958,
+      "step": 309
+    },
+    {
+      "epoch": 0.24313725490196078,
+      "grad_norm": 2.7905077934265137,
+      "learning_rate": 8.802123814011457e-06,
+      "loss": 1.0016,
+      "step": 310
+    },
+    {
+      "epoch": 0.24392156862745099,
+      "grad_norm": 3.4618003368377686,
+      "learning_rate": 8.616016462374414e-06,
+      "loss": 1.2777,
+      "step": 311
+    },
+    {
+      "epoch": 0.2447058823529412,
+      "grad_norm": 4.318086624145508,
+      "learning_rate": 8.431621129619399e-06,
+      "loss": 1.139,
+      "step": 312
+    },
+    {
+      "epoch": 0.24549019607843137,
+      "grad_norm": 4.268404483795166,
+      "learning_rate": 8.248949780895306e-06,
+      "loss": 1.6534,
+      "step": 313
+    },
+    {
+      "epoch": 0.24627450980392157,
+      "grad_norm": 3.4030003547668457,
+      "learning_rate": 8.068014269484189e-06,
+      "loss": 1.146,
+      "step": 314
+    },
+    {
+      "epoch": 0.24705882352941178,
+      "grad_norm": 10.152856826782227,
+      "learning_rate": 7.888826336032093e-06,
+      "loss": 1.53,
+      "step": 315
+    },
+    {
+      "epoch": 0.24784313725490195,
+      "grad_norm": 3.9469947814941406,
+      "learning_rate": 7.71139760778722e-06,
+      "loss": 1.47,
+      "step": 316
+    },
+    {
+      "epoch": 0.24862745098039216,
+      "grad_norm": 4.744470596313477,
+      "learning_rate": 7.535739597845489e-06,
+      "loss": 1.4464,
+      "step": 317
+    },
+    {
+      "epoch": 0.24941176470588236,
+      "grad_norm": 5.53031063079834,
+      "learning_rate": 7.361863704403427e-06,
+      "loss": 1.1041,
+      "step": 318
+    },
+    {
+      "epoch": 0.25019607843137254,
+      "grad_norm": 4.436324596405029,
+      "learning_rate": 7.189781210018604e-06,
+      "loss": 1.3396,
+      "step": 319
+    },
+    {
+      "epoch": 0.25098039215686274,
+      "grad_norm": 5.226994037628174,
+      "learning_rate": 7.019503280877466e-06,
+      "loss": 1.4027,
+      "step": 320
+    },
+    {
+      "epoch": 0.25176470588235295,
+      "grad_norm": 4.095127582550049,
+      "learning_rate": 6.851040966070809e-06,
+      "loss": 1.0458,
+      "step": 321
+    },
+    {
+      "epoch": 0.25254901960784315,
+      "grad_norm": 3.7459771633148193,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 0.7628,
+      "step": 322
+    },
+    {
+      "epoch": 0.25333333333333335,
+      "grad_norm": 4.650934219360352,
+      "learning_rate": 6.519606786051812e-06,
+      "loss": 0.9564,
+      "step": 323
+    },
+    {
+      "epoch": 0.2541176470588235,
+      "grad_norm": 5.133500576019287,
+      "learning_rate": 6.356656427128452e-06,
+      "loss": 1.198,
+      "step": 324
+    },
+    {
+      "epoch": 0.2549019607843137,
+      "grad_norm": 4.67864990234375,
+      "learning_rate": 6.195564693722027e-06,
+      "loss": 1.1566,
+      "step": 325
+    },
+    {
+      "epoch": 0.2556862745098039,
+      "grad_norm": 6.105125427246094,
+      "learning_rate": 6.0363420388442584e-06,
+      "loss": 1.2762,
+      "step": 326
+    },
+    {
+      "epoch": 0.2564705882352941,
+      "grad_norm": 5.678111553192139,
+      "learning_rate": 5.878998794225053e-06,
+      "loss": 0.9718,
+      "step": 327
+    },
+    {
+      "epoch": 0.2572549019607843,
+      "grad_norm": 7.232873916625977,
+      "learning_rate": 5.723545169642064e-06,
+      "loss": 1.1458,
+      "step": 328
+    },
+    {
+      "epoch": 0.2580392156862745,
+      "grad_norm": 4.871210098266602,
+      "learning_rate": 5.569991252258227e-06,
+      "loss": 0.7986,
+      "step": 329
+    },
+    {
+      "epoch": 0.25882352941176473,
+      "grad_norm": 4.941523551940918,
+      "learning_rate": 5.418347005967188e-06,
+      "loss": 0.9478,
+      "step": 330
+    },
+    {
+      "epoch": 0.2596078431372549,
+      "grad_norm": 5.843031406402588,
+      "learning_rate": 5.268622270746769e-06,
+      "loss": 1.3708,
+      "step": 331
+    },
+    {
+      "epoch": 0.2603921568627451,
+      "grad_norm": 5.538339614868164,
+      "learning_rate": 5.120826762020479e-06,
+      "loss": 1.1192,
+      "step": 332
+    },
+    {
+      "epoch": 0.2611764705882353,
+      "grad_norm": 7.33754825592041,
+      "learning_rate": 4.974970070027069e-06,
+      "loss": 1.2714,
+      "step": 333
+    },
+    {
+      "epoch": 0.2619607843137255,
+      "grad_norm": 5.050201892852783,
+      "learning_rate": 4.8310616591982745e-06,
+      "loss": 0.9628,
+      "step": 334
+    },
+    {
+      "epoch": 0.2627450980392157,
+      "grad_norm": 5.330129623413086,
+      "learning_rate": 4.689110867544645e-06,
+      "loss": 0.9378,
+      "step": 335
+    },
+    {
+      "epoch": 0.2635294117647059,
+      "grad_norm": 5.494550704956055,
+      "learning_rate": 4.549126906049629e-06,
+      "loss": 1.3844,
+      "step": 336
+    },
+    {
+      "epoch": 0.2643137254901961,
+      "grad_norm": 7.918943405151367,
+      "learning_rate": 4.411118858071873e-06,
+      "loss": 1.6752,
+      "step": 337
+    },
+    {
+      "epoch": 0.26509803921568625,
+      "grad_norm": 8.494551658630371,
+      "learning_rate": 4.275095678755838e-06,
+      "loss": 1.3365,
+      "step": 338
+    },
+    {
+      "epoch": 0.26588235294117646,
+      "grad_norm": 6.204376697540283,
+      "learning_rate": 4.141066194450712e-06,
+      "loss": 1.2237,
+      "step": 339
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 10.700553894042969,
+      "learning_rate": 4.009039102137657e-06,
+      "loss": 2.0144,
+      "step": 340
+    },
+    {
+      "epoch": 0.26745098039215687,
+      "grad_norm": 8.845378875732422,
+      "learning_rate": 3.879022968865484e-06,
+      "loss": 2.1293,
+      "step": 341
+    },
+    {
+      "epoch": 0.26823529411764707,
+      "grad_norm": 8.239388465881348,
+      "learning_rate": 3.751026231194761e-06,
+      "loss": 1.779,
+      "step": 342
+    },
+    {
+      "epoch": 0.2690196078431373,
+      "grad_norm": 9.209283828735352,
+      "learning_rate": 3.6250571946503677e-06,
+      "loss": 1.6962,
+      "step": 343
+    },
+    {
+      "epoch": 0.2698039215686274,
+      "grad_norm": 9.209612846374512,
+      "learning_rate": 3.5011240331825784e-06,
+      "loss": 1.7011,
+      "step": 344
+    },
+    {
+      "epoch": 0.27058823529411763,
+      "grad_norm": 10.101373672485352,
+      "learning_rate": 3.379234788636626e-06,
+      "loss": 1.532,
+      "step": 345
+    },
+    {
+      "epoch": 0.27137254901960783,
+      "grad_norm": 11.359880447387695,
+      "learning_rate": 3.2593973702309106e-06,
+      "loss": 2.0318,
+      "step": 346
+    },
+    {
+      "epoch": 0.27215686274509804,
+      "grad_norm": 10.647089958190918,
+      "learning_rate": 3.1416195540437896e-06,
+      "loss": 1.9065,
+      "step": 347
+    },
+    {
+      "epoch": 0.27294117647058824,
+      "grad_norm": 15.720868110656738,
+      "learning_rate": 3.0259089825089655e-06,
+      "loss": 1.8493,
+      "step": 348
+    },
+    {
+      "epoch": 0.27372549019607845,
+      "grad_norm": 13.961613655090332,
+      "learning_rate": 2.9122731639196154e-06,
+      "loss": 2.564,
+      "step": 349
+    },
+    {
+      "epoch": 0.27450980392156865,
+      "grad_norm": 12.990523338317871,
+      "learning_rate": 2.800719471941152e-06,
+      "loss": 1.8791,
+      "step": 350
+    },
+    {
+      "epoch": 0.2752941176470588,
+      "grad_norm": 2.013960123062134,
+      "learning_rate": 2.691255145132793e-06,
+      "loss": 2.7899,
+      "step": 351
+    },
+    {
+      "epoch": 0.276078431372549,
+      "grad_norm": 1.9573743343353271,
+      "learning_rate": 2.5838872864778276e-06,
+      "loss": 1.9973,
+      "step": 352
+    },
+    {
+      "epoch": 0.2768627450980392,
+      "grad_norm": 2.0770833492279053,
+      "learning_rate": 2.478622862922738e-06,
+      "loss": 1.5119,
+      "step": 353
+    },
+    {
+      "epoch": 0.2776470588235294,
+      "grad_norm": 2.2673606872558594,
+      "learning_rate": 2.375468704925131e-06,
+      "loss": 1.473,
+      "step": 354
+    },
+    {
+      "epoch": 0.2784313725490196,
+      "grad_norm": 2.035940408706665,
+      "learning_rate": 2.2744315060104845e-06,
+      "loss": 1.1183,
+      "step": 355
+    },
+    {
+      "epoch": 0.2792156862745098,
+      "grad_norm": 2.1854915618896484,
+      "learning_rate": 2.175517822337849e-06,
+      "loss": 1.1662,
+      "step": 356
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 2.2467715740203857,
+      "learning_rate": 2.0787340722744e-06,
+      "loss": 0.9999,
+      "step": 357
+    },
+    {
+      "epoch": 0.2807843137254902,
+      "grad_norm": 2.530546188354492,
+      "learning_rate": 1.9840865359789894e-06,
+      "loss": 1.1862,
+      "step": 358
+    },
+    {
+      "epoch": 0.2815686274509804,
+      "grad_norm": 3.1195712089538574,
+      "learning_rate": 1.891581354994618e-06,
+      "loss": 1.2027,
+      "step": 359
+    },
+    {
+      "epoch": 0.2823529411764706,
+      "grad_norm": 3.0070858001708984,
+      "learning_rate": 1.801224531849908e-06,
+      "loss": 1.0958,
+      "step": 360
+    },
+    {
+      "epoch": 0.2831372549019608,
+      "grad_norm": 3.387432813644409,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 1.3352,
+      "step": 361
+    },
+    {
+      "epoch": 0.283921568627451,
+      "grad_norm": 3.0927343368530273,
+      "learning_rate": 1.6269792717942316e-06,
+      "loss": 1.222,
+      "step": 362
+    },
+    {
+      "epoch": 0.2847058823529412,
+      "grad_norm": 3.4728779792785645,
+      "learning_rate": 1.5431021414084833e-06,
+      "loss": 1.4163,
+      "step": 363
+    },
+    {
+      "epoch": 0.28549019607843135,
+      "grad_norm": 3.3711791038513184,
+      "learning_rate": 1.461395981179182e-06,
+      "loss": 1.1301,
+      "step": 364
+    },
+    {
+      "epoch": 0.28627450980392155,
+      "grad_norm": 3.495633602142334,
+      "learning_rate": 1.3818660929019714e-06,
+      "loss": 1.2015,
+      "step": 365
+    },
+    {
+      "epoch": 0.28705882352941176,
+      "grad_norm": 4.38746452331543,
+      "learning_rate": 1.3045176371573263e-06,
+      "loss": 1.3248,
+      "step": 366
+    },
+    {
+      "epoch": 0.28784313725490196,
+      "grad_norm": 4.259886741638184,
+      "learning_rate": 1.2293556329757026e-06,
+      "loss": 1.512,
+      "step": 367
+    },
+    {
+      "epoch": 0.28862745098039216,
+      "grad_norm": 3.8518102169036865,
+      "learning_rate": 1.1563849575118322e-06,
+      "loss": 1.1679,
+      "step": 368
+    },
+    {
+      "epoch": 0.28941176470588237,
+      "grad_norm": 3.8261094093322754,
+      "learning_rate": 1.08561034572828e-06,
+      "loss": 1.1916,
+      "step": 369
+    },
+    {
+      "epoch": 0.2901960784313726,
+      "grad_norm": 5.102038383483887,
+      "learning_rate": 1.0170363900881796e-06,
+      "loss": 1.8872,
+      "step": 370
+    },
+    {
+      "epoch": 0.2909803921568627,
+      "grad_norm": 5.184679985046387,
+      "learning_rate": 9.506675402572445e-07,
+      "loss": 1.4387,
+      "step": 371
+    },
+    {
+      "epoch": 0.2917647058823529,
+      "grad_norm": 4.591920375823975,
+      "learning_rate": 8.865081028150279e-07,
+      "loss": 0.9937,
+      "step": 372
+    },
+    {
+      "epoch": 0.29254901960784313,
+      "grad_norm": 5.405058860778809,
+      "learning_rate": 8.24562240975476e-07,
+      "loss": 1.3768,
+      "step": 373
+    },
+    {
+      "epoch": 0.29333333333333333,
+      "grad_norm": 4.751702308654785,
+      "learning_rate": 7.648339743168009e-07,
+      "loss": 1.1257,
+      "step": 374
+    },
+    {
+      "epoch": 0.29411764705882354,
+      "grad_norm": 5.307697772979736,
+      "learning_rate": 7.073271785206313e-07,
+      "loss": 1.3546,
+      "step": 375
+    },
+    {
+      "epoch": 0.29490196078431374,
+      "grad_norm": 5.108140468597412,
+      "learning_rate": 6.520455851205298e-07,
+      "loss": 0.9153,
+      "step": 376
+    },
+    {
+      "epoch": 0.29568627450980395,
+      "grad_norm": 4.52064847946167,
+      "learning_rate": 5.98992781259876e-07,
+      "loss": 0.8798,
+      "step": 377
+    },
+    {
+      "epoch": 0.2964705882352941,
+      "grad_norm": 3.916222095489502,
+      "learning_rate": 5.48172209459079e-07,
+      "loss": 0.7,
+      "step": 378
+    },
+    {
+      "epoch": 0.2972549019607843,
+      "grad_norm": 4.668052673339844,
+      "learning_rate": 4.995871673922213e-07,
+      "loss": 1.0176,
+      "step": 379
+    },
+    {
+      "epoch": 0.2980392156862745,
+      "grad_norm": 4.989783763885498,
+      "learning_rate": 4.532408076730504e-07,
+      "loss": 1.0162,
+      "step": 380
+    },
+    {
+      "epoch": 0.2988235294117647,
+      "grad_norm": 3.6178388595581055,
+      "learning_rate": 4.091361376504382e-07,
+      "loss": 0.746,
+      "step": 381
+    },
+    {
+      "epoch": 0.2996078431372549,
+      "grad_norm": 4.038928985595703,
+      "learning_rate": 3.672760192132107e-07,
+      "loss": 0.8829,
+      "step": 382
+    },
+    {
+      "epoch": 0.3003921568627451,
+      "grad_norm": 5.406976699829102,
+      "learning_rate": 3.276631686044734e-07,
+      "loss": 1.3866,
+      "step": 383
+    },
+    {
+      "epoch": 0.30117647058823527,
+      "grad_norm": 4.995997905731201,
+      "learning_rate": 2.903001562453289e-07,
+      "loss": 0.8837,
+      "step": 384
+    },
+    {
+      "epoch": 0.30196078431372547,
+      "grad_norm": 6.531996250152588,
+      "learning_rate": 2.5518940656811094e-07,
+      "loss": 1.3823,
+      "step": 385
+    },
+    {
+      "epoch": 0.3027450980392157,
+      "grad_norm": 6.122218132019043,
+      "learning_rate": 2.2233319785904537e-07,
+      "loss": 1.251,
+      "step": 386
+    },
+    {
+      "epoch": 0.3035294117647059,
+      "grad_norm": 6.061277389526367,
+      "learning_rate": 1.9173366211043484e-07,
+      "loss": 1.4177,
+      "step": 387
+    },
+    {
+      "epoch": 0.3043137254901961,
+      "grad_norm": 5.348453521728516,
+      "learning_rate": 1.6339278488229802e-07,
+      "loss": 1.3636,
+      "step": 388
+    },
+    {
+      "epoch": 0.3050980392156863,
+      "grad_norm": 5.866225719451904,
+      "learning_rate": 1.3731240517354802e-07,
+      "loss": 1.1487,
+      "step": 389
+    },
+    {
+      "epoch": 0.3058823529411765,
+      "grad_norm": 5.9875688552856445,
+      "learning_rate": 1.1349421530265245e-07,
+      "loss": 1.3415,
+      "step": 390
+    },
+    {
+      "epoch": 0.30666666666666664,
+      "grad_norm": 7.915737628936768,
+      "learning_rate": 9.19397607978134e-08,
+      "loss": 1.225,
+      "step": 391
+    },
+    {
+      "epoch": 0.30745098039215685,
+      "grad_norm": 8.340410232543945,
+      "learning_rate": 7.265044029669486e-08,
+      "loss": 1.9622,
+      "step": 392
+    },
+    {
+      "epoch": 0.30823529411764705,
+      "grad_norm": 10.420660018920898,
+      "learning_rate": 5.562750545566652e-08,
+      "loss": 1.9871,
+      "step": 393
+    },
+    {
+      "epoch": 0.30901960784313726,
+      "grad_norm": 9.027931213378906,
+      "learning_rate": 4.087206086856754e-08,
+      "loss": 1.585,
+      "step": 394
+    },
+    {
+      "epoch": 0.30980392156862746,
+      "grad_norm": 10.239818572998047,
+      "learning_rate": 2.838506399506446e-08,
+      "loss": 1.9573,
+      "step": 395
+    },
+    {
+      "epoch": 0.31058823529411766,
+      "grad_norm": 7.999627590179443,
+      "learning_rate": 1.8167325098478735e-08,
+      "loss": 1.6673,
+      "step": 396
+    },
+    {
+      "epoch": 0.31137254901960787,
+      "grad_norm": 9.10187816619873,
+      "learning_rate": 1.0219507193247045e-08,
+      "loss": 1.6721,
+      "step": 397
+    },
+    {
+      "epoch": 0.312156862745098,
+      "grad_norm": 13.277602195739746,
+      "learning_rate": 4.542126001886303e-09,
+      "loss": 3.0342,
+      "step": 398
+    },
+    {
+      "epoch": 0.3129411764705882,
+      "grad_norm": 13.699840545654297,
+      "learning_rate": 1.1355499215215391e-09,
+      "loss": 1.7288,
+      "step": 399
+    },
+    {
+      "epoch": 0.3137254901960784,
+      "grad_norm": 11.232860565185547,
+      "learning_rate": 0.0,
+      "loss": 2.2469,
+      "step": 400
+    },
+    {
+      "epoch": 0.3137254901960784,
+      "eval_loss": 0.3466954827308655,
+      "eval_runtime": 169.3978,
+      "eval_samples_per_second": 12.68,
+      "eval_steps_per_second": 3.17,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.053128390909624e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null