Training in progress, step 1200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2e30b8eb71c75917dc53dc021ce76179da6ff1ddc57ed501cd8f2170df1ea20
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a8ea02f63b71e9ecc281715285720652652fca4f282320a62cc716249a6ef2c
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a5649edf183bac0fd49cc8bb77ff4996498e4a2342ecbc19507d2b16b756222
 size 18810356

 version https://git-lfs.github.com/spec/v1
+oid sha256:8dae95bb49116e81bed3c68d86278f66e9b0452f468bf29d48fecf5e625ba7e0
 size 18810356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55efe0fdedeb3aabf3711868667a8b9d1adcffe6440d9e5347d8fa8bfb07e987
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ea4a928811810f84d0f8b5936c44f2709f0f2cadb34cf4fa9a714f98b7ed5c0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9b96617578bfc230d7a81fb4950d7c67ffa18c14dffcf3189f0dd3ecbd4b36d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b30780423ff45c94e8ad2d6a438363a868ec26f078372884947da902f24979de
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8883090019226074,
   "best_model_checkpoint": "miner_id_24/checkpoint-1000",
-  "epoch": 1.0793572917944314,
   "eval_steps": 100,
-  "global_step": 1100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7803,6 +7803,714 @@
       "eval_samples_per_second": 58.854,
       "eval_steps_per_second": 14.724,
       "step": 1100
     }
   ],
   "logging_steps": 1,
@@ -7817,7 +8525,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -7826,12 +8534,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.929821414424576e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8883090019226074,
   "best_model_checkpoint": "miner_id_24/checkpoint-1000",
+  "epoch": 1.1774806819575616,
   "eval_steps": 100,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 58.854,
       "eval_steps_per_second": 14.724,
       "step": 1100
+    },
+    {
+      "epoch": 1.0803385256960627,
+      "grad_norm": 2.066479444503784,
+      "learning_rate": 8.818283612384952e-05,
+      "loss": 1.6725,
+      "step": 1101
+    },
+    {
+      "epoch": 1.081319759597694,
+      "grad_norm": 2.083176851272583,
+      "learning_rate": 8.80291007154402e-05,
+      "loss": 1.5404,
+      "step": 1102
+    },
+    {
+      "epoch": 1.0823009934993255,
+      "grad_norm": 2.1391541957855225,
+      "learning_rate": 8.787539400573867e-05,
+      "loss": 1.6664,
+      "step": 1103
+    },
+    {
+      "epoch": 1.0832822274009568,
+      "grad_norm": 1.8939257860183716,
+      "learning_rate": 8.772171636323714e-05,
+      "loss": 1.6494,
+      "step": 1104
+    },
+    {
+      "epoch": 1.084263461302588,
+      "grad_norm": 2.128265619277954,
+      "learning_rate": 8.75680681563582e-05,
+      "loss": 1.7697,
+      "step": 1105
+    },
+    {
+      "epoch": 1.0852446952042194,
+      "grad_norm": 1.9691377878189087,
+      "learning_rate": 8.74144497534539e-05,
+      "loss": 1.6618,
+      "step": 1106
+    },
+    {
+      "epoch": 1.0862259291058507,
+      "grad_norm": 2.111213445663452,
+      "learning_rate": 8.726086152280483e-05,
+      "loss": 1.6478,
+      "step": 1107
+    },
+    {
+      "epoch": 1.087207163007482,
+      "grad_norm": 2.051668882369995,
+      "learning_rate": 8.710730383261916e-05,
+      "loss": 1.7054,
+      "step": 1108
+    },
+    {
+      "epoch": 1.0881883969091133,
+      "grad_norm": 1.7977656126022339,
+      "learning_rate": 8.695377705103199e-05,
+      "loss": 1.4778,
+      "step": 1109
+    },
+    {
+      "epoch": 1.0891696308107446,
+      "grad_norm": 2.13419246673584,
+      "learning_rate": 8.680028154610419e-05,
+      "loss": 1.8192,
+      "step": 1110
+    },
+    {
+      "epoch": 1.0901508647123759,
+      "grad_norm": 1.9920395612716675,
+      "learning_rate": 8.66468176858217e-05,
+      "loss": 1.6159,
+      "step": 1111
+    },
+    {
+      "epoch": 1.0911320986140072,
+      "grad_norm": 1.8859739303588867,
+      "learning_rate": 8.649338583809466e-05,
+      "loss": 1.4988,
+      "step": 1112
+    },
+    {
+      "epoch": 1.0921133325156385,
+      "grad_norm": 1.9767767190933228,
+      "learning_rate": 8.633998637075634e-05,
+      "loss": 1.6233,
+      "step": 1113
+    },
+    {
+      "epoch": 1.0930945664172698,
+      "grad_norm": 1.9266538619995117,
+      "learning_rate": 8.618661965156243e-05,
+      "loss": 1.6353,
+      "step": 1114
+    },
+    {
+      "epoch": 1.094075800318901,
+      "grad_norm": 2.1085169315338135,
+      "learning_rate": 8.60332860481902e-05,
+      "loss": 1.5874,
+      "step": 1115
+    },
+    {
+      "epoch": 1.0950570342205324,
+      "grad_norm": 1.8933839797973633,
+      "learning_rate": 8.587998592823738e-05,
+      "loss": 1.574,
+      "step": 1116
+    },
+    {
+      "epoch": 1.0960382681221637,
+      "grad_norm": 2.0371503829956055,
+      "learning_rate": 8.572671965922148e-05,
+      "loss": 1.7796,
+      "step": 1117
+    },
+    {
+      "epoch": 1.097019502023795,
+      "grad_norm": 1.9261376857757568,
+      "learning_rate": 8.557348760857899e-05,
+      "loss": 1.4353,
+      "step": 1118
+    },
+    {
+      "epoch": 1.0980007359254262,
+      "grad_norm": 2.089020252227783,
+      "learning_rate": 8.542029014366419e-05,
+      "loss": 1.7129,
+      "step": 1119
+    },
+    {
+      "epoch": 1.0989819698270575,
+      "grad_norm": 1.8985182046890259,
+      "learning_rate": 8.526712763174844e-05,
+      "loss": 1.5255,
+      "step": 1120
+    },
+    {
+      "epoch": 1.0999632037286888,
+      "grad_norm": 2.0458781719207764,
+      "learning_rate": 8.511400044001948e-05,
+      "loss": 1.742,
+      "step": 1121
+    },
+    {
+      "epoch": 1.1009444376303201,
+      "grad_norm": 1.925270438194275,
+      "learning_rate": 8.496090893558019e-05,
+      "loss": 1.5204,
+      "step": 1122
+    },
+    {
+      "epoch": 1.1019256715319514,
+      "grad_norm": 1.7764109373092651,
+      "learning_rate": 8.480785348544802e-05,
+      "loss": 1.4883,
+      "step": 1123
+    },
+    {
+      "epoch": 1.1029069054335827,
+      "grad_norm": 2.034646511077881,
+      "learning_rate": 8.465483445655394e-05,
+      "loss": 1.7298,
+      "step": 1124
+    },
+    {
+      "epoch": 1.103888139335214,
+      "grad_norm": 1.8433154821395874,
+      "learning_rate": 8.450185221574152e-05,
+      "loss": 1.5534,
+      "step": 1125
+    },
+    {
+      "epoch": 1.1048693732368453,
+      "grad_norm": 2.088686943054199,
+      "learning_rate": 8.434890712976632e-05,
+      "loss": 1.5524,
+      "step": 1126
+    },
+    {
+      "epoch": 1.1058506071384766,
+      "grad_norm": 2.0298330783843994,
+      "learning_rate": 8.419599956529466e-05,
+      "loss": 1.7555,
+      "step": 1127
+    },
+    {
+      "epoch": 1.106831841040108,
+      "grad_norm": 1.9726227521896362,
+      "learning_rate": 8.404312988890299e-05,
+      "loss": 1.6304,
+      "step": 1128
+    },
+    {
+      "epoch": 1.1078130749417392,
+      "grad_norm": 1.998617172241211,
+      "learning_rate": 8.389029846707688e-05,
+      "loss": 1.6792,
+      "step": 1129
+    },
+    {
+      "epoch": 1.1087943088433705,
+      "grad_norm": 1.983628749847412,
+      "learning_rate": 8.373750566621027e-05,
+      "loss": 1.5545,
+      "step": 1130
+    },
+    {
+      "epoch": 1.1097755427450018,
+      "grad_norm": 1.913836121559143,
+      "learning_rate": 8.358475185260438e-05,
+      "loss": 1.5676,
+      "step": 1131
+    },
+    {
+      "epoch": 1.110756776646633,
+      "grad_norm": 1.8630105257034302,
+      "learning_rate": 8.343203739246712e-05,
+      "loss": 1.4564,
+      "step": 1132
+    },
+    {
+      "epoch": 1.1117380105482644,
+      "grad_norm": 2.2467024326324463,
+      "learning_rate": 8.32793626519119e-05,
+      "loss": 1.7565,
+      "step": 1133
+    },
+    {
+      "epoch": 1.1127192444498957,
+      "grad_norm": 1.967323899269104,
+      "learning_rate": 8.312672799695702e-05,
+      "loss": 1.5484,
+      "step": 1134
+    },
+    {
+      "epoch": 1.113700478351527,
+      "grad_norm": 1.8815773725509644,
+      "learning_rate": 8.297413379352469e-05,
+      "loss": 1.5544,
+      "step": 1135
+    },
+    {
+      "epoch": 1.1146817122531583,
+      "grad_norm": 2.087390422821045,
+      "learning_rate": 8.282158040744003e-05,
+      "loss": 1.5492,
+      "step": 1136
+    },
+    {
+      "epoch": 1.1156629461547896,
+      "grad_norm": 2.0206923484802246,
+      "learning_rate": 8.266906820443036e-05,
+      "loss": 1.5578,
+      "step": 1137
+    },
+    {
+      "epoch": 1.1166441800564209,
+      "grad_norm": 2.1264116764068604,
+      "learning_rate": 8.251659755012435e-05,
+      "loss": 1.7562,
+      "step": 1138
+    },
+    {
+      "epoch": 1.1176254139580521,
+      "grad_norm": 2.1084797382354736,
+      "learning_rate": 8.236416881005093e-05,
+      "loss": 1.8296,
+      "step": 1139
+    },
+    {
+      "epoch": 1.1186066478596834,
+      "grad_norm": 2.130791425704956,
+      "learning_rate": 8.22117823496386e-05,
+      "loss": 1.6879,
+      "step": 1140
+    },
+    {
+      "epoch": 1.1195878817613147,
+      "grad_norm": 1.9630494117736816,
+      "learning_rate": 8.205943853421457e-05,
+      "loss": 1.6107,
+      "step": 1141
+    },
+    {
+      "epoch": 1.1205691156629463,
+      "grad_norm": 2.180497407913208,
+      "learning_rate": 8.190713772900374e-05,
+      "loss": 1.7211,
+      "step": 1142
+    },
+    {
+      "epoch": 1.1215503495645776,
+      "grad_norm": 1.846765160560608,
+      "learning_rate": 8.175488029912783e-05,
+      "loss": 1.5607,
+      "step": 1143
+    },
+    {
+      "epoch": 1.1225315834662088,
+      "grad_norm": 2.244673252105713,
+      "learning_rate": 8.160266660960472e-05,
+      "loss": 1.8069,
+      "step": 1144
+    },
+    {
+      "epoch": 1.1235128173678401,
+      "grad_norm": 2.002246141433716,
+      "learning_rate": 8.14504970253474e-05,
+      "loss": 1.6429,
+      "step": 1145
+    },
+    {
+      "epoch": 1.1244940512694714,
+      "grad_norm": 1.974992275238037,
+      "learning_rate": 8.129837191116298e-05,
+      "loss": 1.5133,
+      "step": 1146
+    },
+    {
+      "epoch": 1.1254752851711027,
+      "grad_norm": 1.9683125019073486,
+      "learning_rate": 8.114629163175215e-05,
+      "loss": 1.5906,
+      "step": 1147
+    },
+    {
+      "epoch": 1.126456519072734,
+      "grad_norm": 1.8641992807388306,
+      "learning_rate": 8.099425655170801e-05,
+      "loss": 1.4253,
+      "step": 1148
+    },
+    {
+      "epoch": 1.1274377529743653,
+      "grad_norm": 1.923235535621643,
+      "learning_rate": 8.084226703551528e-05,
+      "loss": 1.591,
+      "step": 1149
+    },
+    {
+      "epoch": 1.1284189868759966,
+      "grad_norm": 1.962015151977539,
+      "learning_rate": 8.06903234475495e-05,
+      "loss": 1.573,
+      "step": 1150
+    },
+    {
+      "epoch": 1.129400220777628,
+      "grad_norm": 1.9990873336791992,
+      "learning_rate": 8.053842615207615e-05,
+      "loss": 1.7404,
+      "step": 1151
+    },
+    {
+      "epoch": 1.1303814546792592,
+      "grad_norm": 2.225425958633423,
+      "learning_rate": 8.038657551324955e-05,
+      "loss": 1.8477,
+      "step": 1152
+    },
+    {
+      "epoch": 1.1313626885808905,
+      "grad_norm": 1.7779438495635986,
+      "learning_rate": 8.02347718951124e-05,
+      "loss": 1.5567,
+      "step": 1153
+    },
+    {
+      "epoch": 1.1323439224825218,
+      "grad_norm": 2.0244076251983643,
+      "learning_rate": 8.008301566159447e-05,
+      "loss": 1.6891,
+      "step": 1154
+    },
+    {
+      "epoch": 1.133325156384153,
+      "grad_norm": 1.987186074256897,
+      "learning_rate": 7.993130717651207e-05,
+      "loss": 1.7452,
+      "step": 1155
+    },
+    {
+      "epoch": 1.1343063902857844,
+      "grad_norm": 2.076939344406128,
+      "learning_rate": 7.977964680356696e-05,
+      "loss": 1.7669,
+      "step": 1156
+    },
+    {
+      "epoch": 1.1352876241874157,
+      "grad_norm": 1.9476886987686157,
+      "learning_rate": 7.962803490634563e-05,
+      "loss": 1.6541,
+      "step": 1157
+    },
+    {
+      "epoch": 1.136268858089047,
+      "grad_norm": 2.047985792160034,
+      "learning_rate": 7.947647184831824e-05,
+      "loss": 1.486,
+      "step": 1158
+    },
+    {
+      "epoch": 1.1372500919906783,
+      "grad_norm": 2.015349864959717,
+      "learning_rate": 7.932495799283801e-05,
+      "loss": 1.5786,
+      "step": 1159
+    },
+    {
+      "epoch": 1.1382313258923096,
+      "grad_norm": 1.8462163209915161,
+      "learning_rate": 7.917349370314007e-05,
+      "loss": 1.483,
+      "step": 1160
+    },
+    {
+      "epoch": 1.1392125597939409,
+      "grad_norm": 1.9125447273254395,
+      "learning_rate": 7.902207934234078e-05,
+      "loss": 1.6106,
+      "step": 1161
+    },
+    {
+      "epoch": 1.1401937936955722,
+      "grad_norm": 1.9936769008636475,
+      "learning_rate": 7.887071527343687e-05,
+      "loss": 1.748,
+      "step": 1162
+    },
+    {
+      "epoch": 1.1411750275972035,
+      "grad_norm": 2.2277896404266357,
+      "learning_rate": 7.871940185930438e-05,
+      "loss": 1.9264,
+      "step": 1163
+    },
+    {
+      "epoch": 1.1421562614988348,
+      "grad_norm": 2.0859293937683105,
+      "learning_rate": 7.856813946269795e-05,
+      "loss": 1.5817,
+      "step": 1164
+    },
+    {
+      "epoch": 1.143137495400466,
+      "grad_norm": 1.9917875528335571,
+      "learning_rate": 7.841692844624999e-05,
+      "loss": 1.7425,
+      "step": 1165
+    },
+    {
+      "epoch": 1.1441187293020973,
+      "grad_norm": 1.9697751998901367,
+      "learning_rate": 7.826576917246961e-05,
+      "loss": 1.4807,
+      "step": 1166
+    },
+    {
+      "epoch": 1.1450999632037286,
+      "grad_norm": 2.149662733078003,
+      "learning_rate": 7.811466200374194e-05,
+      "loss": 1.902,
+      "step": 1167
+    },
+    {
+      "epoch": 1.14608119710536,
+      "grad_norm": 2.1379075050354004,
+      "learning_rate": 7.796360730232724e-05,
+      "loss": 1.7285,
+      "step": 1168
+    },
+    {
+      "epoch": 1.1470624310069912,
+      "grad_norm": 1.9367038011550903,
+      "learning_rate": 7.78126054303599e-05,
+      "loss": 1.5475,
+      "step": 1169
+    },
+    {
+      "epoch": 1.1480436649086225,
+      "grad_norm": 1.949225902557373,
+      "learning_rate": 7.766165674984766e-05,
+      "loss": 1.6411,
+      "step": 1170
+    },
+    {
+      "epoch": 1.1490248988102538,
+      "grad_norm": 1.9248489141464233,
+      "learning_rate": 7.751076162267086e-05,
+      "loss": 1.4931,
+      "step": 1171
+    },
+    {
+      "epoch": 1.1500061327118851,
+      "grad_norm": 2.1530492305755615,
+      "learning_rate": 7.735992041058126e-05,
+      "loss": 1.7015,
+      "step": 1172
+    },
+    {
+      "epoch": 1.1509873666135164,
+      "grad_norm": 1.9067368507385254,
+      "learning_rate": 7.720913347520154e-05,
+      "loss": 1.723,
+      "step": 1173
+    },
+    {
+      "epoch": 1.1519686005151477,
+      "grad_norm": 2.1236000061035156,
+      "learning_rate": 7.705840117802418e-05,
+      "loss": 1.4677,
+      "step": 1174
+    },
+    {
+      "epoch": 1.152949834416779,
+      "grad_norm": 2.022383213043213,
+      "learning_rate": 7.69077238804107e-05,
+      "loss": 1.6665,
+      "step": 1175
+    },
+    {
+      "epoch": 1.1539310683184105,
+      "grad_norm": 2.1001341342926025,
+      "learning_rate": 7.675710194359069e-05,
+      "loss": 1.832,
+      "step": 1176
+    },
+    {
+      "epoch": 1.1549123022200418,
+      "grad_norm": 1.9974193572998047,
+      "learning_rate": 7.660653572866112e-05,
+      "loss": 1.4889,
+      "step": 1177
+    },
+    {
+      "epoch": 1.1558935361216731,
+      "grad_norm": 1.9368098974227905,
+      "learning_rate": 7.645602559658533e-05,
+      "loss": 1.471,
+      "step": 1178
+    },
+    {
+      "epoch": 1.1568747700233044,
+      "grad_norm": 2.0265512466430664,
+      "learning_rate": 7.630557190819217e-05,
+      "loss": 1.5049,
+      "step": 1179
+    },
+    {
+      "epoch": 1.1578560039249357,
+      "grad_norm": 1.9389729499816895,
+      "learning_rate": 7.61551750241753e-05,
+      "loss": 1.591,
+      "step": 1180
+    },
+    {
+      "epoch": 1.158837237826567,
+      "grad_norm": 2.064378023147583,
+      "learning_rate": 7.600483530509204e-05,
+      "loss": 1.6401,
+      "step": 1181
+    },
+    {
+      "epoch": 1.1598184717281983,
+      "grad_norm": 1.7993282079696655,
+      "learning_rate": 7.58545531113627e-05,
+      "loss": 1.4952,
+      "step": 1182
+    },
+    {
+      "epoch": 1.1607997056298296,
+      "grad_norm": 1.8726584911346436,
+      "learning_rate": 7.57043288032698e-05,
+      "loss": 1.5525,
+      "step": 1183
+    },
+    {
+      "epoch": 1.1617809395314609,
+      "grad_norm": 1.9391907453536987,
+      "learning_rate": 7.555416274095694e-05,
+      "loss": 1.6255,
+      "step": 1184
+    },
+    {
+      "epoch": 1.1627621734330922,
+      "grad_norm": 2.114257574081421,
+      "learning_rate": 7.540405528442822e-05,
+      "loss": 1.7399,
+      "step": 1185
+    },
+    {
+      "epoch": 1.1637434073347235,
+      "grad_norm": 2.2467081546783447,
+      "learning_rate": 7.525400679354712e-05,
+      "loss": 1.7055,
+      "step": 1186
+    },
+    {
+      "epoch": 1.1647246412363548,
+      "grad_norm": 2.285236120223999,
+      "learning_rate": 7.510401762803575e-05,
+      "loss": 1.783,
+      "step": 1187
+    },
+    {
+      "epoch": 1.165705875137986,
+      "grad_norm": 2.042032480239868,
+      "learning_rate": 7.495408814747418e-05,
+      "loss": 1.5541,
+      "step": 1188
+    },
+    {
+      "epoch": 1.1666871090396174,
+      "grad_norm": 2.073132276535034,
+      "learning_rate": 7.480421871129914e-05,
+      "loss": 1.7099,
+      "step": 1189
+    },
+    {
+      "epoch": 1.1676683429412487,
+      "grad_norm": 1.807121992111206,
+      "learning_rate": 7.465440967880354e-05,
+      "loss": 1.3538,
+      "step": 1190
+    },
+    {
+      "epoch": 1.16864957684288,
+      "grad_norm": 2.3562142848968506,
+      "learning_rate": 7.450466140913557e-05,
+      "loss": 1.8335,
+      "step": 1191
+    },
+    {
+      "epoch": 1.1696308107445113,
+      "grad_norm": 2.075679302215576,
+      "learning_rate": 7.435497426129759e-05,
+      "loss": 1.6448,
+      "step": 1192
+    },
+    {
+      "epoch": 1.1706120446461425,
+      "grad_norm": 2.0627026557922363,
+      "learning_rate": 7.420534859414542e-05,
+      "loss": 1.5626,
+      "step": 1193
+    },
+    {
+      "epoch": 1.1715932785477738,
+      "grad_norm": 2.0521657466888428,
+      "learning_rate": 7.405578476638768e-05,
+      "loss": 1.7099,
+      "step": 1194
+    },
+    {
+      "epoch": 1.1725745124494051,
+      "grad_norm": 1.9551303386688232,
+      "learning_rate": 7.390628313658457e-05,
+      "loss": 1.5361,
+      "step": 1195
+    },
+    {
+      "epoch": 1.1735557463510364,
+      "grad_norm": 1.9095375537872314,
+      "learning_rate": 7.375684406314715e-05,
+      "loss": 1.4968,
+      "step": 1196
+    },
+    {
+      "epoch": 1.1745369802526677,
+      "grad_norm": 2.150273323059082,
+      "learning_rate": 7.360746790433672e-05,
+      "loss": 1.6411,
+      "step": 1197
+    },
+    {
+      "epoch": 1.175518214154299,
+      "grad_norm": 1.9941962957382202,
+      "learning_rate": 7.345815501826353e-05,
+      "loss": 1.6179,
+      "step": 1198
+    },
+    {
+      "epoch": 1.1764994480559303,
+      "grad_norm": 2.2665584087371826,
+      "learning_rate": 7.330890576288619e-05,
+      "loss": 1.8879,
+      "step": 1199
+    },
+    {
+      "epoch": 1.1774806819575616,
+      "grad_norm": 2.202577829360962,
+      "learning_rate": 7.315972049601086e-05,
+      "loss": 1.7252,
+      "step": 1200
+    },
+    {
+      "epoch": 1.1774806819575616,
+      "eval_loss": 1.8954843282699585,
+      "eval_runtime": 23.0768,
+      "eval_samples_per_second": 58.89,
+      "eval_steps_per_second": 14.733,
+      "step": 1200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.650775715250176e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null