{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4643978008769782, "global_step": 652, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.517241379310345e-06, "loss": 0.4754, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.103448275862069e-05, "loss": 0.497, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.6551724137931037e-05, "loss": 0.5361, "step": 6 }, { "epoch": 0.01, "learning_rate": 2.206896551724138e-05, "loss": 0.4945, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.7586206896551727e-05, "loss": 0.4847, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.310344827586207e-05, "loss": 0.4741, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.862068965517242e-05, "loss": 0.4742, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.413793103448276e-05, "loss": 0.4544, "step": 16 }, { "epoch": 0.01, "learning_rate": 4.9655172413793107e-05, "loss": 0.4779, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.517241379310345e-05, "loss": 0.4374, "step": 20 }, { "epoch": 0.02, "learning_rate": 6.068965517241379e-05, "loss": 0.5045, "step": 22 }, { "epoch": 0.02, "learning_rate": 6.620689655172415e-05, "loss": 0.46, "step": 24 }, { "epoch": 0.02, "learning_rate": 7.17241379310345e-05, "loss": 0.4489, "step": 26 }, { "epoch": 0.02, "learning_rate": 7.724137931034484e-05, "loss": 0.4535, "step": 28 }, { "epoch": 0.02, "learning_rate": 7.999989544228434e-05, "loss": 0.4802, "step": 30 }, { "epoch": 0.02, "learning_rate": 7.999905898383868e-05, "loss": 0.5038, "step": 32 }, { "epoch": 0.02, "learning_rate": 7.999738608443897e-05, "loss": 0.4804, "step": 34 }, { "epoch": 0.03, "learning_rate": 7.999487677906807e-05, "loss": 0.4752, "step": 36 }, { "epoch": 0.03, "learning_rate": 7.999153112019939e-05, "loss": 0.4681, "step": 38 }, { "epoch": 0.03, "learning_rate": 7.998734917779568e-05, "loss": 0.456, "step": 40 }, { "epoch": 0.03, "learning_rate": 7.998233103930771e-05, "loss": 0.4724, "step": 42 }, { "epoch": 0.03, "learning_rate": 7.99764768096724e-05, "loss": 0.4514, "step": 44 }, { "epoch": 0.03, "learning_rate": 7.99697866113105e-05, "loss": 0.4601, "step": 46 }, { "epoch": 0.03, "learning_rate": 7.996226058412426e-05, "loss": 0.4754, "step": 48 }, { "epoch": 0.04, "learning_rate": 7.995389888549428e-05, "loss": 0.4395, "step": 50 }, { "epoch": 0.04, "learning_rate": 7.994470169027636e-05, "loss": 0.4795, "step": 52 }, { "epoch": 0.04, "learning_rate": 7.993466919079781e-05, "loss": 0.4506, "step": 54 }, { "epoch": 0.04, "learning_rate": 7.992380159685338e-05, "loss": 0.5032, "step": 56 }, { "epoch": 0.04, "learning_rate": 7.991209913570094e-05, "loss": 0.4629, "step": 58 }, { "epoch": 0.04, "learning_rate": 7.989956205205669e-05, "loss": 0.4653, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.988619060809005e-05, "loss": 0.4462, "step": 62 }, { "epoch": 0.05, "learning_rate": 7.98719850834182e-05, "loss": 0.434, "step": 64 }, { "epoch": 0.05, "learning_rate": 7.985694577510017e-05, "loss": 0.4845, "step": 66 }, { "epoch": 0.05, "learning_rate": 7.98410729976307e-05, "loss": 0.4586, "step": 68 }, { "epoch": 0.05, "learning_rate": 7.982436708293364e-05, "loss": 0.4515, "step": 70 }, { "epoch": 0.05, "learning_rate": 7.980682838035498e-05, "loss": 0.4621, "step": 72 }, { "epoch": 0.05, "learning_rate": 7.978845725665557e-05, "loss": 0.4524, "step": 74 }, { "epoch": 0.05, "learning_rate": 7.976925409600346e-05, "loss": 0.4458, "step": 76 }, { "epoch": 0.06, "learning_rate": 7.974921929996585e-05, "loss": 0.4992, "step": 78 }, { "epoch": 0.06, "learning_rate": 7.97283532875007e-05, "loss": 0.4931, "step": 80 }, { "epoch": 0.06, "learning_rate": 7.970665649494793e-05, "loss": 0.469, "step": 82 }, { "epoch": 0.06, "learning_rate": 7.968412937602039e-05, "loss": 0.5017, "step": 84 }, { "epoch": 0.06, "learning_rate": 7.966077240179428e-05, "loss": 0.4753, "step": 86 }, { "epoch": 0.06, "learning_rate": 7.963658606069931e-05, "loss": 0.4618, "step": 88 }, { "epoch": 0.06, "learning_rate": 7.961157085850858e-05, "loss": 0.4356, "step": 90 }, { "epoch": 0.07, "learning_rate": 7.958572731832784e-05, "loss": 0.5153, "step": 92 }, { "epoch": 0.07, "learning_rate": 7.955905598058471e-05, "loss": 0.4933, "step": 94 }, { "epoch": 0.07, "learning_rate": 7.95315574030173e-05, "loss": 0.4749, "step": 96 }, { "epoch": 0.07, "learning_rate": 7.950323216066252e-05, "loss": 0.4162, "step": 98 }, { "epoch": 0.07, "learning_rate": 7.947408084584417e-05, "loss": 0.4894, "step": 100 }, { "epoch": 0.07, "learning_rate": 7.944410406816039e-05, "loss": 0.4281, "step": 102 }, { "epoch": 0.07, "learning_rate": 7.941330245447103e-05, "loss": 0.4245, "step": 104 }, { "epoch": 0.08, "learning_rate": 7.938167664888458e-05, "loss": 0.4468, "step": 106 }, { "epoch": 0.08, "learning_rate": 7.934922731274452e-05, "loss": 0.4765, "step": 108 }, { "epoch": 0.08, "learning_rate": 7.931595512461566e-05, "loss": 0.4411, "step": 110 }, { "epoch": 0.08, "learning_rate": 7.928186078026991e-05, "loss": 0.4727, "step": 112 }, { "epoch": 0.08, "learning_rate": 7.924694499267169e-05, "loss": 0.4123, "step": 114 }, { "epoch": 0.08, "learning_rate": 7.921120849196303e-05, "loss": 0.4703, "step": 116 }, { "epoch": 0.08, "learning_rate": 7.917465202544833e-05, "loss": 0.4674, "step": 118 }, { "epoch": 0.09, "learning_rate": 7.913727635757873e-05, "loss": 0.4812, "step": 120 }, { "epoch": 0.09, "learning_rate": 7.909908226993609e-05, "loss": 0.4304, "step": 122 }, { "epoch": 0.09, "learning_rate": 7.906007056121667e-05, "loss": 0.4449, "step": 124 }, { "epoch": 0.09, "learning_rate": 7.902024204721445e-05, "loss": 0.4433, "step": 126 }, { "epoch": 0.09, "learning_rate": 7.897959756080402e-05, "loss": 0.4602, "step": 128 }, { "epoch": 0.09, "learning_rate": 7.89381379519232e-05, "loss": 0.4698, "step": 130 }, { "epoch": 0.09, "learning_rate": 7.889586408755526e-05, "loss": 0.3831, "step": 132 }, { "epoch": 0.1, "learning_rate": 7.885277685171078e-05, "loss": 0.4394, "step": 134 }, { "epoch": 0.1, "learning_rate": 7.880887714540917e-05, "loss": 0.4302, "step": 136 }, { "epoch": 0.1, "learning_rate": 7.876416588665984e-05, "loss": 0.4079, "step": 138 }, { "epoch": 0.1, "learning_rate": 7.871864401044297e-05, "loss": 0.4639, "step": 140 }, { "epoch": 0.1, "learning_rate": 7.867231246869002e-05, "loss": 0.4498, "step": 142 }, { "epoch": 0.1, "learning_rate": 7.862517223026373e-05, "loss": 0.4289, "step": 144 }, { "epoch": 0.1, "learning_rate": 7.857722428093797e-05, "loss": 0.4515, "step": 146 }, { "epoch": 0.11, "learning_rate": 7.8528469623377e-05, "loss": 0.4322, "step": 148 }, { "epoch": 0.11, "learning_rate": 7.847890927711464e-05, "loss": 0.434, "step": 150 }, { "epoch": 0.11, "learning_rate": 7.842854427853285e-05, "loss": 0.4431, "step": 152 }, { "epoch": 0.11, "learning_rate": 7.83773756808401e-05, "loss": 0.4089, "step": 154 }, { "epoch": 0.11, "learning_rate": 7.83254045540493e-05, "loss": 0.488, "step": 156 }, { "epoch": 0.11, "learning_rate": 7.827263198495554e-05, "loss": 0.4526, "step": 158 }, { "epoch": 0.11, "learning_rate": 7.821905907711319e-05, "loss": 0.4184, "step": 160 }, { "epoch": 0.12, "learning_rate": 7.816468695081297e-05, "loss": 0.4176, "step": 162 }, { "epoch": 0.12, "learning_rate": 7.810951674305845e-05, "loss": 0.4562, "step": 164 }, { "epoch": 0.12, "learning_rate": 7.805354960754233e-05, "loss": 0.4403, "step": 166 }, { "epoch": 0.12, "learning_rate": 7.799678671462222e-05, "loss": 0.4238, "step": 168 }, { "epoch": 0.12, "learning_rate": 7.793922925129626e-05, "loss": 0.4911, "step": 170 }, { "epoch": 0.12, "learning_rate": 7.788087842117826e-05, "loss": 0.4704, "step": 172 }, { "epoch": 0.12, "learning_rate": 7.782173544447253e-05, "loss": 0.4355, "step": 174 }, { "epoch": 0.13, "learning_rate": 7.776180155794836e-05, "loss": 0.4341, "step": 176 }, { "epoch": 0.13, "learning_rate": 7.770107801491414e-05, "loss": 0.4198, "step": 178 }, { "epoch": 0.13, "learning_rate": 7.763956608519123e-05, "loss": 0.4756, "step": 180 }, { "epoch": 0.13, "learning_rate": 7.757726705508732e-05, "loss": 0.4315, "step": 182 }, { "epoch": 0.13, "learning_rate": 7.751418222736954e-05, "loss": 0.4167, "step": 184 }, { "epoch": 0.13, "learning_rate": 7.745031292123727e-05, "loss": 0.4316, "step": 186 }, { "epoch": 0.13, "learning_rate": 7.738566047229454e-05, "loss": 0.4103, "step": 188 }, { "epoch": 0.14, "learning_rate": 7.732022623252203e-05, "loss": 0.4527, "step": 190 }, { "epoch": 0.14, "learning_rate": 7.725401157024887e-05, "loss": 0.4587, "step": 192 }, { "epoch": 0.14, "learning_rate": 7.718701787012406e-05, "loss": 0.457, "step": 194 }, { "epoch": 0.14, "learning_rate": 7.711924653308737e-05, "loss": 0.4463, "step": 196 }, { "epoch": 0.14, "learning_rate": 7.705069897634022e-05, "loss": 0.3972, "step": 198 }, { "epoch": 0.14, "learning_rate": 7.69813766333159e-05, "loss": 0.4375, "step": 200 }, { "epoch": 0.14, "learning_rate": 7.691128095364969e-05, "loss": 0.4472, "step": 202 }, { "epoch": 0.15, "learning_rate": 7.684041340314852e-05, "loss": 0.4198, "step": 204 }, { "epoch": 0.15, "learning_rate": 7.676877546376028e-05, "loss": 0.4365, "step": 206 }, { "epoch": 0.15, "learning_rate": 7.669636863354287e-05, "loss": 0.4393, "step": 208 }, { "epoch": 0.15, "learning_rate": 7.662319442663286e-05, "loss": 0.373, "step": 210 }, { "epoch": 0.15, "learning_rate": 7.654925437321386e-05, "loss": 0.4099, "step": 212 }, { "epoch": 0.15, "learning_rate": 7.647455001948443e-05, "loss": 0.4473, "step": 214 }, { "epoch": 0.15, "learning_rate": 7.639908292762585e-05, "loss": 0.4578, "step": 216 }, { "epoch": 0.16, "learning_rate": 7.632285467576943e-05, "loss": 0.4263, "step": 218 }, { "epoch": 0.16, "learning_rate": 7.624586685796343e-05, "loss": 0.402, "step": 220 }, { "epoch": 0.16, "learning_rate": 7.616812108413986e-05, "loss": 0.4347, "step": 222 }, { "epoch": 0.16, "learning_rate": 7.608961898008066e-05, "loss": 0.45, "step": 224 }, { "epoch": 0.16, "learning_rate": 7.601036218738382e-05, "loss": 0.4445, "step": 226 }, { "epoch": 0.16, "learning_rate": 7.593035236342903e-05, "loss": 0.421, "step": 228 }, { "epoch": 0.16, "learning_rate": 7.5849591181343e-05, "loss": 0.4048, "step": 230 }, { "epoch": 0.17, "learning_rate": 7.576808032996444e-05, "loss": 0.4372, "step": 232 }, { "epoch": 0.17, "learning_rate": 7.568582151380881e-05, "loss": 0.3985, "step": 234 }, { "epoch": 0.17, "learning_rate": 7.560281645303266e-05, "loss": 0.4387, "step": 236 }, { "epoch": 0.17, "learning_rate": 7.551906688339762e-05, "loss": 0.3997, "step": 238 }, { "epoch": 0.17, "learning_rate": 7.543457455623413e-05, "loss": 0.3969, "step": 240 }, { "epoch": 0.17, "learning_rate": 7.534934123840486e-05, "loss": 0.4212, "step": 242 }, { "epoch": 0.17, "learning_rate": 7.526336871226763e-05, "loss": 0.4026, "step": 244 }, { "epoch": 0.18, "learning_rate": 7.51766587756383e-05, "loss": 0.424, "step": 246 }, { "epoch": 0.18, "learning_rate": 7.50892132417531e-05, "loss": 0.4037, "step": 248 }, { "epoch": 0.18, "learning_rate": 7.500103393923068e-05, "loss": 0.395, "step": 250 }, { "epoch": 0.18, "learning_rate": 7.491212271203391e-05, "loss": 0.385, "step": 252 }, { "epoch": 0.18, "learning_rate": 7.482248141943133e-05, "loss": 0.4292, "step": 254 }, { "epoch": 0.18, "learning_rate": 7.473211193595825e-05, "loss": 0.4002, "step": 256 }, { "epoch": 0.18, "learning_rate": 7.464101615137756e-05, "loss": 0.4208, "step": 258 }, { "epoch": 0.19, "learning_rate": 7.454919597064017e-05, "loss": 0.3817, "step": 260 }, { "epoch": 0.19, "learning_rate": 7.445665331384526e-05, "loss": 0.3668, "step": 262 }, { "epoch": 0.19, "learning_rate": 7.436339011620007e-05, "loss": 0.4409, "step": 264 }, { "epoch": 0.19, "learning_rate": 7.426940832797941e-05, "loss": 0.3886, "step": 266 }, { "epoch": 0.19, "learning_rate": 7.417470991448494e-05, "loss": 0.4372, "step": 268 }, { "epoch": 0.19, "learning_rate": 7.407929685600403e-05, "loss": 0.4362, "step": 270 }, { "epoch": 0.19, "learning_rate": 7.398317114776836e-05, "loss": 0.4164, "step": 272 }, { "epoch": 0.2, "learning_rate": 7.388633479991219e-05, "loss": 0.4498, "step": 274 }, { "epoch": 0.2, "learning_rate": 7.378878983743037e-05, "loss": 0.3623, "step": 276 }, { "epoch": 0.2, "learning_rate": 7.369053830013588e-05, "loss": 0.4217, "step": 278 }, { "epoch": 0.2, "learning_rate": 7.35915822426173e-05, "loss": 0.4554, "step": 280 }, { "epoch": 0.2, "learning_rate": 7.349192373419583e-05, "loss": 0.4505, "step": 282 }, { "epoch": 0.2, "learning_rate": 7.339156485888191e-05, "loss": 0.3735, "step": 284 }, { "epoch": 0.2, "learning_rate": 7.329050771533176e-05, "loss": 0.4257, "step": 286 }, { "epoch": 0.21, "learning_rate": 7.31887544168034e-05, "loss": 0.469, "step": 288 }, { "epoch": 0.21, "learning_rate": 7.308630709111261e-05, "loss": 0.3918, "step": 290 }, { "epoch": 0.21, "learning_rate": 7.29831678805882e-05, "loss": 0.4332, "step": 292 }, { "epoch": 0.21, "learning_rate": 7.287933894202743e-05, "loss": 0.4651, "step": 294 }, { "epoch": 0.21, "learning_rate": 7.277482244665079e-05, "loss": 0.3642, "step": 296 }, { "epoch": 0.21, "learning_rate": 7.266962058005659e-05, "loss": 0.4475, "step": 298 }, { "epoch": 0.21, "learning_rate": 7.256373554217536e-05, "loss": 0.4169, "step": 300 }, { "epoch": 0.22, "learning_rate": 7.245716954722374e-05, "loss": 0.4228, "step": 302 }, { "epoch": 0.22, "learning_rate": 7.234992482365821e-05, "loss": 0.4214, "step": 304 }, { "epoch": 0.22, "learning_rate": 7.22420036141285e-05, "loss": 0.4339, "step": 306 }, { "epoch": 0.22, "learning_rate": 7.21334081754307e-05, "loss": 0.4068, "step": 308 }, { "epoch": 0.22, "learning_rate": 7.202414077846003e-05, "loss": 0.4048, "step": 310 }, { "epoch": 0.22, "learning_rate": 7.191420370816339e-05, "loss": 0.4318, "step": 312 }, { "epoch": 0.22, "learning_rate": 7.180359926349158e-05, "loss": 0.3897, "step": 314 }, { "epoch": 0.23, "learning_rate": 7.169232975735115e-05, "loss": 0.4575, "step": 316 }, { "epoch": 0.23, "learning_rate": 7.158039751655619e-05, "loss": 0.442, "step": 318 }, { "epoch": 0.23, "learning_rate": 7.146780488177948e-05, "loss": 0.3831, "step": 320 }, { "epoch": 0.23, "learning_rate": 7.135455420750371e-05, "loss": 0.3654, "step": 322 }, { "epoch": 0.23, "learning_rate": 7.124064786197213e-05, "loss": 0.4198, "step": 324 }, { "epoch": 0.23, "learning_rate": 7.112608822713908e-05, "loss": 0.4102, "step": 326 }, { "epoch": 0.23, "learning_rate": 7.10108776986202e-05, "loss": 0.375, "step": 328 }, { "epoch": 0.24, "learning_rate": 7.089501868564224e-05, "loss": 0.4253, "step": 330 }, { "epoch": 0.24, "learning_rate": 7.077851361099281e-05, "loss": 0.4087, "step": 332 }, { "epoch": 0.24, "learning_rate": 7.066136491096961e-05, "loss": 0.4172, "step": 334 }, { "epoch": 0.24, "learning_rate": 7.054357503532953e-05, "loss": 0.4306, "step": 336 }, { "epoch": 0.24, "learning_rate": 7.042514644723741e-05, "loss": 0.3946, "step": 338 }, { "epoch": 0.24, "learning_rate": 7.030608162321455e-05, "loss": 0.4295, "step": 340 }, { "epoch": 0.24, "learning_rate": 7.018638305308693e-05, "loss": 0.4249, "step": 342 }, { "epoch": 0.25, "learning_rate": 7.006605323993301e-05, "loss": 0.3931, "step": 344 }, { "epoch": 0.25, "learning_rate": 6.994509470003167e-05, "loss": 0.3852, "step": 346 }, { "epoch": 0.25, "learning_rate": 6.982350996280926e-05, "loss": 0.3973, "step": 348 }, { "epoch": 0.25, "learning_rate": 6.970130157078697e-05, "loss": 0.422, "step": 350 }, { "epoch": 0.25, "learning_rate": 6.957847207952752e-05, "loss": 0.3904, "step": 352 }, { "epoch": 0.25, "learning_rate": 6.945502405758174e-05, "loss": 0.3854, "step": 354 }, { "epoch": 0.25, "learning_rate": 6.933096008643489e-05, "loss": 0.3958, "step": 356 }, { "epoch": 0.25, "learning_rate": 6.92062827604527e-05, "loss": 0.4068, "step": 358 }, { "epoch": 0.26, "learning_rate": 6.9080994686827e-05, "loss": 0.3632, "step": 360 }, { "epoch": 0.26, "learning_rate": 6.895509848552134e-05, "loss": 0.4087, "step": 362 }, { "epoch": 0.26, "learning_rate": 6.882859678921612e-05, "loss": 0.3684, "step": 364 }, { "epoch": 0.26, "learning_rate": 6.870149224325358e-05, "loss": 0.4083, "step": 366 }, { "epoch": 0.26, "learning_rate": 6.85737875055824e-05, "loss": 0.4025, "step": 368 }, { "epoch": 0.26, "learning_rate": 6.844548524670226e-05, "loss": 0.3936, "step": 370 }, { "epoch": 0.26, "learning_rate": 6.831658814960786e-05, "loss": 0.4363, "step": 372 }, { "epoch": 0.27, "learning_rate": 6.818709890973289e-05, "loss": 0.4119, "step": 374 }, { "epoch": 0.27, "learning_rate": 6.805702023489361e-05, "loss": 0.3917, "step": 376 }, { "epoch": 0.27, "learning_rate": 6.792635484523231e-05, "loss": 0.4127, "step": 378 }, { "epoch": 0.27, "learning_rate": 6.779510547316034e-05, "loss": 0.3575, "step": 380 }, { "epoch": 0.27, "learning_rate": 6.766327486330103e-05, "loss": 0.3646, "step": 382 }, { "epoch": 0.27, "learning_rate": 6.753086577243225e-05, "loss": 0.4003, "step": 384 }, { "epoch": 0.27, "learning_rate": 6.739788096942879e-05, "loss": 0.4276, "step": 386 }, { "epoch": 0.28, "learning_rate": 6.726432323520447e-05, "loss": 0.3895, "step": 388 }, { "epoch": 0.28, "learning_rate": 6.713019536265395e-05, "loss": 0.392, "step": 390 }, { "epoch": 0.28, "learning_rate": 6.699550015659437e-05, "loss": 0.3909, "step": 392 }, { "epoch": 0.28, "learning_rate": 6.686024043370667e-05, "loss": 0.39, "step": 394 }, { "epoch": 0.28, "learning_rate": 6.672441902247666e-05, "loss": 0.3835, "step": 396 }, { "epoch": 0.28, "learning_rate": 6.658803876313592e-05, "loss": 0.4331, "step": 398 }, { "epoch": 0.28, "learning_rate": 6.645110250760243e-05, "loss": 0.3894, "step": 400 }, { "epoch": 0.29, "learning_rate": 6.631361311942085e-05, "loss": 0.3964, "step": 402 }, { "epoch": 0.29, "learning_rate": 6.617557347370266e-05, "loss": 0.3844, "step": 404 }, { "epoch": 0.29, "learning_rate": 6.603698645706614e-05, "loss": 0.3761, "step": 406 }, { "epoch": 0.29, "learning_rate": 6.589785496757587e-05, "loss": 0.399, "step": 408 }, { "epoch": 0.29, "learning_rate": 6.575818191468217e-05, "loss": 0.4046, "step": 410 }, { "epoch": 0.29, "learning_rate": 6.561797021916031e-05, "loss": 0.402, "step": 412 }, { "epoch": 0.29, "learning_rate": 6.547722281304935e-05, "loss": 0.3824, "step": 414 }, { "epoch": 0.3, "learning_rate": 6.533594263959095e-05, "loss": 0.4048, "step": 416 }, { "epoch": 0.3, "learning_rate": 6.519413265316762e-05, "loss": 0.382, "step": 418 }, { "epoch": 0.3, "learning_rate": 6.50517958192412e-05, "loss": 0.3952, "step": 420 }, { "epoch": 0.3, "learning_rate": 6.49089351142906e-05, "loss": 0.3711, "step": 422 }, { "epoch": 0.3, "learning_rate": 6.476555352574973e-05, "loss": 0.3902, "step": 424 }, { "epoch": 0.3, "learning_rate": 6.462165405194492e-05, "loss": 0.4164, "step": 426 }, { "epoch": 0.3, "learning_rate": 6.447723970203232e-05, "loss": 0.4021, "step": 428 }, { "epoch": 0.31, "learning_rate": 6.433231349593487e-05, "loss": 0.3853, "step": 430 }, { "epoch": 0.31, "learning_rate": 6.418687846427922e-05, "loss": 0.385, "step": 432 }, { "epoch": 0.31, "learning_rate": 6.404093764833235e-05, "loss": 0.4017, "step": 434 }, { "epoch": 0.31, "learning_rate": 6.389449409993791e-05, "loss": 0.3751, "step": 436 }, { "epoch": 0.31, "learning_rate": 6.374755088145253e-05, "loss": 0.3937, "step": 438 }, { "epoch": 0.31, "learning_rate": 6.360011106568158e-05, "loss": 0.3854, "step": 440 }, { "epoch": 0.31, "learning_rate": 6.345217773581512e-05, "loss": 0.4066, "step": 442 }, { "epoch": 0.32, "learning_rate": 6.330375398536333e-05, "loss": 0.4047, "step": 444 }, { "epoch": 0.32, "learning_rate": 6.315484291809181e-05, "loss": 0.3834, "step": 446 }, { "epoch": 0.32, "learning_rate": 6.300544764795669e-05, "loss": 0.4043, "step": 448 }, { "epoch": 0.32, "learning_rate": 6.285557129903954e-05, "loss": 0.3703, "step": 450 }, { "epoch": 0.32, "learning_rate": 6.270521700548196e-05, "loss": 0.3748, "step": 452 }, { "epoch": 0.32, "learning_rate": 6.255438791142017e-05, "loss": 0.4021, "step": 454 }, { "epoch": 0.32, "learning_rate": 6.240308717091913e-05, "loss": 0.3973, "step": 456 }, { "epoch": 0.33, "learning_rate": 6.22513179479067e-05, "loss": 0.3821, "step": 458 }, { "epoch": 0.33, "learning_rate": 6.209908341610736e-05, "loss": 0.4095, "step": 460 }, { "epoch": 0.33, "learning_rate": 6.194638675897591e-05, "loss": 0.4244, "step": 462 }, { "epoch": 0.33, "learning_rate": 6.179323116963095e-05, "loss": 0.3658, "step": 464 }, { "epoch": 0.33, "learning_rate": 6.163961985078798e-05, "loss": 0.3782, "step": 466 }, { "epoch": 0.33, "learning_rate": 6.148555601469252e-05, "loss": 0.4021, "step": 468 }, { "epoch": 0.33, "learning_rate": 6.133104288305293e-05, "loss": 0.3933, "step": 470 }, { "epoch": 0.34, "learning_rate": 6.1176083686973e-05, "loss": 0.3814, "step": 472 }, { "epoch": 0.34, "learning_rate": 6.1020681666884406e-05, "loss": 0.3991, "step": 474 }, { "epoch": 0.34, "learning_rate": 6.086484007247894e-05, "loss": 0.4124, "step": 476 }, { "epoch": 0.34, "learning_rate": 6.070856216264057e-05, "loss": 0.369, "step": 478 }, { "epoch": 0.34, "learning_rate": 6.055185120537728e-05, "loss": 0.379, "step": 480 }, { "epoch": 0.34, "learning_rate": 6.039471047775273e-05, "loss": 0.4135, "step": 482 }, { "epoch": 0.34, "learning_rate": 6.023714326581772e-05, "loss": 0.3477, "step": 484 }, { "epoch": 0.35, "learning_rate": 6.0079152864541506e-05, "loss": 0.4139, "step": 486 }, { "epoch": 0.35, "learning_rate": 5.992074257774285e-05, "loss": 0.3711, "step": 488 }, { "epoch": 0.35, "learning_rate": 5.976191571802097e-05, "loss": 0.4052, "step": 490 }, { "epoch": 0.35, "learning_rate": 5.960267560668625e-05, "loss": 0.402, "step": 492 }, { "epoch": 0.35, "learning_rate": 5.9443025573690795e-05, "loss": 0.3992, "step": 494 }, { "epoch": 0.35, "learning_rate": 5.928296895755882e-05, "loss": 0.3895, "step": 496 }, { "epoch": 0.35, "learning_rate": 5.9122509105316736e-05, "loss": 0.4004, "step": 498 }, { "epoch": 0.36, "learning_rate": 5.896164937242331e-05, "loss": 0.3736, "step": 500 }, { "epoch": 0.36, "learning_rate": 5.880039312269939e-05, "loss": 0.3452, "step": 502 }, { "epoch": 0.36, "learning_rate": 5.863874372825757e-05, "loss": 0.3806, "step": 504 }, { "epoch": 0.36, "learning_rate": 5.847670456943174e-05, "loss": 0.3684, "step": 506 }, { "epoch": 0.36, "learning_rate": 5.831427903470631e-05, "loss": 0.3891, "step": 508 }, { "epoch": 0.36, "learning_rate": 5.815147052064543e-05, "loss": 0.3882, "step": 510 }, { "epoch": 0.36, "learning_rate": 5.798828243182191e-05, "loss": 0.3889, "step": 512 }, { "epoch": 0.37, "learning_rate": 5.7824718180746034e-05, "loss": 0.3882, "step": 514 }, { "epoch": 0.37, "learning_rate": 5.766078118779424e-05, "loss": 0.3863, "step": 516 }, { "epoch": 0.37, "learning_rate": 5.749647488113755e-05, "loss": 0.3629, "step": 518 }, { "epoch": 0.37, "learning_rate": 5.7331802696669885e-05, "loss": 0.3666, "step": 520 }, { "epoch": 0.37, "learning_rate": 5.716676807793624e-05, "loss": 0.3499, "step": 522 }, { "epoch": 0.37, "learning_rate": 5.7001374476060646e-05, "loss": 0.4004, "step": 524 }, { "epoch": 0.37, "learning_rate": 5.683562534967401e-05, "loss": 0.3468, "step": 526 }, { "epoch": 0.38, "learning_rate": 5.666952416484183e-05, "loss": 0.3729, "step": 528 }, { "epoch": 0.38, "learning_rate": 5.650307439499164e-05, "loss": 0.3763, "step": 530 }, { "epoch": 0.38, "learning_rate": 5.6336279520840445e-05, "loss": 0.3543, "step": 532 }, { "epoch": 0.38, "learning_rate": 5.61691430303219e-05, "loss": 0.4355, "step": 534 }, { "epoch": 0.38, "learning_rate": 5.600166841851333e-05, "loss": 0.3764, "step": 536 }, { "epoch": 0.38, "learning_rate": 5.583385918756277e-05, "loss": 0.3594, "step": 538 }, { "epoch": 0.38, "learning_rate": 5.5665718846615584e-05, "loss": 0.3589, "step": 540 }, { "epoch": 0.39, "learning_rate": 5.549725091174117e-05, "loss": 0.3726, "step": 542 }, { "epoch": 0.39, "learning_rate": 5.532845890585941e-05, "loss": 0.3803, "step": 544 }, { "epoch": 0.39, "learning_rate": 5.5159346358667e-05, "loss": 0.3661, "step": 546 }, { "epoch": 0.39, "learning_rate": 5.4989916806563655e-05, "loss": 0.3902, "step": 548 }, { "epoch": 0.39, "learning_rate": 5.482017379257812e-05, "loss": 0.3953, "step": 550 }, { "epoch": 0.39, "learning_rate": 5.465012086629411e-05, "loss": 0.3633, "step": 552 }, { "epoch": 0.39, "learning_rate": 5.447976158377611e-05, "loss": 0.3559, "step": 554 }, { "epoch": 0.4, "learning_rate": 5.430909950749491e-05, "loss": 0.3633, "step": 556 }, { "epoch": 0.4, "learning_rate": 5.413813820625324e-05, "loss": 0.3553, "step": 558 }, { "epoch": 0.4, "learning_rate": 5.3966881255111024e-05, "loss": 0.3908, "step": 560 }, { "epoch": 0.4, "learning_rate": 5.3795332235310744e-05, "loss": 0.3674, "step": 562 }, { "epoch": 0.4, "learning_rate": 5.3623494734202415e-05, "loss": 0.4049, "step": 564 }, { "epoch": 0.4, "learning_rate": 5.345137234516864e-05, "loss": 0.351, "step": 566 }, { "epoch": 0.4, "learning_rate": 5.327896866754953e-05, "loss": 0.3518, "step": 568 }, { "epoch": 0.41, "learning_rate": 5.310628730656726e-05, "loss": 0.3458, "step": 570 }, { "epoch": 0.41, "learning_rate": 5.293333187325086e-05, "loss": 0.3756, "step": 572 }, { "epoch": 0.41, "learning_rate": 5.276010598436061e-05, "loss": 0.3508, "step": 574 }, { "epoch": 0.41, "learning_rate": 5.2586613262312414e-05, "loss": 0.3665, "step": 576 }, { "epoch": 0.41, "learning_rate": 5.241285733510209e-05, "loss": 0.3502, "step": 578 }, { "epoch": 0.41, "learning_rate": 5.223884183622943e-05, "loss": 0.3846, "step": 580 }, { "epoch": 0.41, "learning_rate": 5.20645704046223e-05, "loss": 0.3475, "step": 582 }, { "epoch": 0.42, "learning_rate": 5.189004668456051e-05, "loss": 0.4049, "step": 584 }, { "epoch": 0.42, "learning_rate": 5.171527432559957e-05, "loss": 0.3801, "step": 586 }, { "epoch": 0.42, "learning_rate": 5.1540256982494425e-05, "loss": 0.4386, "step": 588 }, { "epoch": 0.42, "learning_rate": 5.1364998315123024e-05, "loss": 0.3385, "step": 590 }, { "epoch": 0.42, "learning_rate": 5.118950198840977e-05, "loss": 0.3704, "step": 592 }, { "epoch": 0.42, "learning_rate": 5.101377167224885e-05, "loss": 0.3842, "step": 594 }, { "epoch": 0.42, "learning_rate": 5.0837811041427534e-05, "loss": 0.3807, "step": 596 }, { "epoch": 0.43, "learning_rate": 5.0661623775549376e-05, "loss": 0.377, "step": 598 }, { "epoch": 0.43, "learning_rate": 5.048521355895714e-05, "loss": 0.3896, "step": 600 }, { "epoch": 0.43, "learning_rate": 5.030858408065587e-05, "loss": 0.3721, "step": 602 }, { "epoch": 0.43, "learning_rate": 5.01317390342357e-05, "loss": 0.3744, "step": 604 }, { "epoch": 0.43, "learning_rate": 4.995468211779459e-05, "loss": 0.4038, "step": 606 }, { "epoch": 0.43, "learning_rate": 4.977741703386107e-05, "loss": 0.3491, "step": 608 }, { "epoch": 0.43, "learning_rate": 4.9599947489316725e-05, "loss": 0.3655, "step": 610 }, { "epoch": 0.44, "learning_rate": 4.942227719531875e-05, "loss": 0.3962, "step": 612 }, { "epoch": 0.44, "learning_rate": 4.924440986722231e-05, "loss": 0.3642, "step": 614 }, { "epoch": 0.44, "learning_rate": 4.906634922450285e-05, "loss": 0.3703, "step": 616 }, { "epoch": 0.44, "learning_rate": 4.888809899067829e-05, "loss": 0.3628, "step": 618 }, { "epoch": 0.44, "learning_rate": 4.870966289323123e-05, "loss": 0.3639, "step": 620 }, { "epoch": 0.44, "learning_rate": 4.8531044663530944e-05, "loss": 0.3929, "step": 622 }, { "epoch": 0.44, "learning_rate": 4.835224803675537e-05, "loss": 0.3438, "step": 624 }, { "epoch": 0.45, "learning_rate": 4.817327675181297e-05, "loss": 0.3349, "step": 626 }, { "epoch": 0.45, "learning_rate": 4.799413455126461e-05, "loss": 0.3395, "step": 628 }, { "epoch": 0.45, "learning_rate": 4.7814825181245254e-05, "loss": 0.3754, "step": 630 }, { "epoch": 0.45, "learning_rate": 4.763535239138562e-05, "loss": 0.3264, "step": 632 }, { "epoch": 0.45, "learning_rate": 4.74557199347338e-05, "loss": 0.3946, "step": 634 }, { "epoch": 0.45, "learning_rate": 4.7275931567676735e-05, "loss": 0.3699, "step": 636 }, { "epoch": 0.45, "learning_rate": 4.709599104986172e-05, "loss": 0.3708, "step": 638 }, { "epoch": 0.46, "learning_rate": 4.691590214411772e-05, "loss": 0.3557, "step": 640 }, { "epoch": 0.46, "learning_rate": 4.6735668616376734e-05, "loss": 0.3537, "step": 642 }, { "epoch": 0.46, "learning_rate": 4.6555294235595047e-05, "loss": 0.3601, "step": 644 }, { "epoch": 0.46, "learning_rate": 4.637478277367436e-05, "loss": 0.366, "step": 646 }, { "epoch": 0.46, "learning_rate": 4.6194138005382945e-05, "loss": 0.3928, "step": 648 }, { "epoch": 0.46, "learning_rate": 4.6013363708276754e-05, "loss": 0.3209, "step": 650 }, { "epoch": 0.46, "learning_rate": 4.5832463662620353e-05, "loss": 0.3796, "step": 652 } ], "max_steps": 1403, "num_train_epochs": 1, "total_flos": 3.846934282119414e+17, "trial_name": null, "trial_params": null }