{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.687523259161477, "global_step": 810000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.9824459158667857e-05, "loss": 1.0594, "step": 5000 }, { "epoch": 0.07, "learning_rate": 4.964891831733571e-05, "loss": 1.0381, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.947337747600357e-05, "loss": 1.014, "step": 15000 }, { "epoch": 0.14, "learning_rate": 4.9297836634671426e-05, "loss": 0.9997, "step": 20000 }, { "epoch": 0.18, "learning_rate": 4.912229579333928e-05, "loss": 0.9936, "step": 25000 }, { "epoch": 0.21, "learning_rate": 4.8946754952007134e-05, "loss": 0.9765, "step": 30000 }, { "epoch": 0.25, "learning_rate": 4.877121411067499e-05, "loss": 0.975, "step": 35000 }, { "epoch": 0.28, "learning_rate": 4.859567326934284e-05, "loss": 0.9627, "step": 40000 }, { "epoch": 0.32, "learning_rate": 4.84201324280107e-05, "loss": 0.9641, "step": 45000 }, { "epoch": 0.35, "learning_rate": 4.8244591586678564e-05, "loss": 0.9562, "step": 50000 }, { "epoch": 0.39, "learning_rate": 4.806905074534642e-05, "loss": 0.9634, "step": 55000 }, { "epoch": 0.42, "learning_rate": 4.789350990401427e-05, "loss": 0.9532, "step": 60000 }, { "epoch": 0.46, "learning_rate": 4.771796906268213e-05, "loss": 0.9539, "step": 65000 }, { "epoch": 0.49, "learning_rate": 4.754242822134998e-05, "loss": 0.9599, "step": 70000 }, { "epoch": 0.53, "learning_rate": 4.7366887380017835e-05, "loss": 0.9437, "step": 75000 }, { "epoch": 0.56, "learning_rate": 4.7191346538685696e-05, "loss": 0.9395, "step": 80000 }, { "epoch": 0.6, "learning_rate": 4.701580569735355e-05, "loss": 0.9385, "step": 85000 }, { "epoch": 0.63, "learning_rate": 4.6840264856021404e-05, "loss": 0.947, "step": 90000 }, { "epoch": 0.67, "learning_rate": 4.666472401468926e-05, "loss": 0.9402, "step": 95000 }, { "epoch": 0.7, "learning_rate": 4.648918317335711e-05, "loss": 0.9317, "step": 100000 }, { "epoch": 0.74, "learning_rate": 4.631364233202497e-05, "loss": 0.9316, "step": 105000 }, { "epoch": 0.77, "learning_rate": 4.613810149069283e-05, "loss": 0.9399, "step": 110000 }, { "epoch": 0.81, "learning_rate": 4.596256064936068e-05, "loss": 0.9395, "step": 115000 }, { "epoch": 0.84, "learning_rate": 4.5787019808028536e-05, "loss": 0.9303, "step": 120000 }, { "epoch": 0.88, "learning_rate": 4.561147896669639e-05, "loss": 0.9304, "step": 125000 }, { "epoch": 0.91, "learning_rate": 4.5435938125364244e-05, "loss": 0.9126, "step": 130000 }, { "epoch": 0.95, "learning_rate": 4.5260397284032105e-05, "loss": 0.9165, "step": 135000 }, { "epoch": 0.98, "learning_rate": 4.5084856442699966e-05, "loss": 0.9311, "step": 140000 }, { "epoch": 1.02, "learning_rate": 4.490931560136782e-05, "loss": 0.9139, "step": 145000 }, { "epoch": 1.05, "learning_rate": 4.4733774760035675e-05, "loss": 0.9097, "step": 150000 }, { "epoch": 1.09, "learning_rate": 4.455823391870353e-05, "loss": 0.9086, "step": 155000 }, { "epoch": 1.12, "learning_rate": 4.438269307737138e-05, "loss": 0.9172, "step": 160000 }, { "epoch": 1.16, "learning_rate": 4.420715223603924e-05, "loss": 0.9106, "step": 165000 }, { "epoch": 1.19, "learning_rate": 4.40316113947071e-05, "loss": 0.9001, "step": 170000 }, { "epoch": 1.23, "learning_rate": 4.385607055337495e-05, "loss": 0.9108, "step": 175000 }, { "epoch": 1.26, "learning_rate": 4.3680529712042806e-05, "loss": 0.911, "step": 180000 }, { "epoch": 1.3, "learning_rate": 4.350498887071066e-05, "loss": 0.9007, "step": 185000 }, { "epoch": 1.33, "learning_rate": 4.3329448029378515e-05, "loss": 0.8908, "step": 190000 }, { "epoch": 1.37, "learning_rate": 4.315390718804637e-05, "loss": 0.897, "step": 195000 }, { "epoch": 1.4, "learning_rate": 4.297836634671423e-05, "loss": 0.902, "step": 200000 }, { "epoch": 1.44, "learning_rate": 4.2802825505382084e-05, "loss": 0.8918, "step": 205000 }, { "epoch": 1.47, "learning_rate": 4.262728466404994e-05, "loss": 0.889, "step": 210000 }, { "epoch": 1.51, "learning_rate": 4.245174382271779e-05, "loss": 0.8929, "step": 215000 }, { "epoch": 1.54, "learning_rate": 4.2276202981385646e-05, "loss": 0.8974, "step": 220000 }, { "epoch": 1.58, "learning_rate": 4.210066214005351e-05, "loss": 0.8932, "step": 225000 }, { "epoch": 1.61, "learning_rate": 4.192512129872136e-05, "loss": 0.8901, "step": 230000 }, { "epoch": 1.65, "learning_rate": 4.174958045738922e-05, "loss": 0.8849, "step": 235000 }, { "epoch": 1.69, "learning_rate": 4.1574039616057077e-05, "loss": 0.8801, "step": 240000 }, { "epoch": 1.72, "learning_rate": 4.139849877472493e-05, "loss": 0.8807, "step": 245000 }, { "epoch": 1.76, "learning_rate": 4.1222957933392785e-05, "loss": 0.8847, "step": 250000 }, { "epoch": 1.79, "learning_rate": 4.104741709206064e-05, "loss": 0.8753, "step": 255000 }, { "epoch": 1.83, "learning_rate": 4.08718762507285e-05, "loss": 0.8764, "step": 260000 }, { "epoch": 1.86, "learning_rate": 4.0696335409396354e-05, "loss": 0.8748, "step": 265000 }, { "epoch": 1.9, "learning_rate": 4.052079456806421e-05, "loss": 0.8789, "step": 270000 }, { "epoch": 1.93, "learning_rate": 4.034525372673206e-05, "loss": 0.875, "step": 275000 }, { "epoch": 1.97, "learning_rate": 4.016971288539992e-05, "loss": 0.8711, "step": 280000 }, { "epoch": 2.0, "learning_rate": 3.999417204406777e-05, "loss": 0.8688, "step": 285000 }, { "epoch": 2.04, "learning_rate": 3.981863120273563e-05, "loss": 0.8594, "step": 290000 }, { "epoch": 2.07, "learning_rate": 3.9643090361403486e-05, "loss": 0.8595, "step": 295000 }, { "epoch": 2.11, "learning_rate": 3.946754952007134e-05, "loss": 0.8536, "step": 300000 }, { "epoch": 2.14, "learning_rate": 3.9292008678739194e-05, "loss": 0.8618, "step": 305000 }, { "epoch": 2.18, "learning_rate": 3.911646783740705e-05, "loss": 0.8511, "step": 310000 }, { "epoch": 2.21, "learning_rate": 3.894092699607491e-05, "loss": 0.8543, "step": 315000 }, { "epoch": 2.25, "learning_rate": 3.8765386154742763e-05, "loss": 0.856, "step": 320000 }, { "epoch": 2.28, "learning_rate": 3.8589845313410624e-05, "loss": 0.8608, "step": 325000 }, { "epoch": 2.32, "learning_rate": 3.841430447207848e-05, "loss": 0.8449, "step": 330000 }, { "epoch": 2.35, "learning_rate": 3.823876363074633e-05, "loss": 0.8527, "step": 335000 }, { "epoch": 2.39, "learning_rate": 3.806322278941419e-05, "loss": 0.8441, "step": 340000 }, { "epoch": 2.42, "learning_rate": 3.788768194808204e-05, "loss": 0.8436, "step": 345000 }, { "epoch": 2.46, "learning_rate": 3.77121411067499e-05, "loss": 0.8448, "step": 350000 }, { "epoch": 2.49, "learning_rate": 3.7536600265417756e-05, "loss": 0.8348, "step": 355000 }, { "epoch": 2.53, "learning_rate": 3.736105942408561e-05, "loss": 0.8466, "step": 360000 }, { "epoch": 2.56, "learning_rate": 3.7185518582753464e-05, "loss": 0.8354, "step": 365000 }, { "epoch": 2.6, "learning_rate": 3.700997774142132e-05, "loss": 0.8421, "step": 370000 }, { "epoch": 2.63, "learning_rate": 3.683443690008917e-05, "loss": 0.8354, "step": 375000 }, { "epoch": 2.67, "learning_rate": 3.6658896058757034e-05, "loss": 0.8358, "step": 380000 }, { "epoch": 2.7, "learning_rate": 3.648335521742489e-05, "loss": 0.8414, "step": 385000 }, { "epoch": 2.74, "learning_rate": 3.630781437609274e-05, "loss": 0.8428, "step": 390000 }, { "epoch": 2.77, "learning_rate": 3.6132273534760596e-05, "loss": 0.8303, "step": 395000 }, { "epoch": 2.81, "learning_rate": 3.595673269342845e-05, "loss": 0.834, "step": 400000 }, { "epoch": 2.84, "learning_rate": 3.578119185209631e-05, "loss": 0.8311, "step": 405000 }, { "epoch": 2.88, "learning_rate": 3.5605651010764165e-05, "loss": 0.8245, "step": 410000 }, { "epoch": 2.91, "learning_rate": 3.5430110169432026e-05, "loss": 0.8211, "step": 415000 }, { "epoch": 2.95, "learning_rate": 3.525456932809988e-05, "loss": 0.8154, "step": 420000 }, { "epoch": 2.98, "learning_rate": 3.5079028486767735e-05, "loss": 0.8196, "step": 425000 }, { "epoch": 3.02, "learning_rate": 3.490348764543559e-05, "loss": 0.8188, "step": 430000 }, { "epoch": 3.05, "learning_rate": 3.472794680410344e-05, "loss": 0.81, "step": 435000 }, { "epoch": 3.09, "learning_rate": 3.45524059627713e-05, "loss": 0.8054, "step": 440000 }, { "epoch": 3.12, "learning_rate": 3.437686512143916e-05, "loss": 0.8156, "step": 445000 }, { "epoch": 3.16, "learning_rate": 3.420132428010701e-05, "loss": 0.8032, "step": 450000 }, { "epoch": 3.19, "learning_rate": 3.4025783438774866e-05, "loss": 0.8109, "step": 455000 }, { "epoch": 3.23, "learning_rate": 3.385024259744272e-05, "loss": 0.812, "step": 460000 }, { "epoch": 3.27, "learning_rate": 3.3674701756110575e-05, "loss": 0.8102, "step": 465000 }, { "epoch": 3.3, "learning_rate": 3.3499160914778436e-05, "loss": 0.8032, "step": 470000 }, { "epoch": 3.34, "learning_rate": 3.332362007344629e-05, "loss": 0.8098, "step": 475000 }, { "epoch": 3.37, "learning_rate": 3.3148079232114144e-05, "loss": 0.8057, "step": 480000 }, { "epoch": 3.41, "learning_rate": 3.2972538390782e-05, "loss": 0.8026, "step": 485000 }, { "epoch": 3.44, "learning_rate": 3.279699754944985e-05, "loss": 0.8039, "step": 490000 }, { "epoch": 3.48, "learning_rate": 3.262145670811771e-05, "loss": 0.8024, "step": 495000 }, { "epoch": 3.51, "learning_rate": 3.244591586678557e-05, "loss": 0.8018, "step": 500000 }, { "epoch": 3.55, "learning_rate": 3.227037502545343e-05, "loss": 0.7953, "step": 505000 }, { "epoch": 3.58, "learning_rate": 3.209483418412128e-05, "loss": 0.7895, "step": 510000 }, { "epoch": 3.62, "learning_rate": 3.191929334278914e-05, "loss": 0.7878, "step": 515000 }, { "epoch": 3.65, "learning_rate": 3.174375250145699e-05, "loss": 0.794, "step": 520000 }, { "epoch": 3.69, "learning_rate": 3.1568211660124845e-05, "loss": 0.7998, "step": 525000 }, { "epoch": 3.72, "learning_rate": 3.13926708187927e-05, "loss": 0.7819, "step": 530000 }, { "epoch": 3.76, "learning_rate": 3.121712997746056e-05, "loss": 0.7917, "step": 535000 }, { "epoch": 3.79, "learning_rate": 3.1041589136128414e-05, "loss": 0.7827, "step": 540000 }, { "epoch": 3.83, "learning_rate": 3.086604829479627e-05, "loss": 0.7874, "step": 545000 }, { "epoch": 3.86, "learning_rate": 3.069050745346412e-05, "loss": 0.7788, "step": 550000 }, { "epoch": 3.9, "learning_rate": 3.051496661213198e-05, "loss": 0.7821, "step": 555000 }, { "epoch": 3.93, "learning_rate": 3.0339425770799834e-05, "loss": 0.7828, "step": 560000 }, { "epoch": 3.97, "learning_rate": 3.016388492946769e-05, "loss": 0.7769, "step": 565000 }, { "epoch": 4.0, "learning_rate": 2.9988344088135546e-05, "loss": 0.7779, "step": 570000 }, { "epoch": 4.04, "learning_rate": 2.98128032468034e-05, "loss": 0.7633, "step": 575000 }, { "epoch": 4.07, "learning_rate": 2.963726240547126e-05, "loss": 0.765, "step": 580000 }, { "epoch": 4.11, "learning_rate": 2.9461721564139115e-05, "loss": 0.7614, "step": 585000 }, { "epoch": 4.14, "learning_rate": 2.9286180722806973e-05, "loss": 0.7669, "step": 590000 }, { "epoch": 4.18, "learning_rate": 2.9110639881474827e-05, "loss": 0.7721, "step": 595000 }, { "epoch": 4.21, "learning_rate": 2.893509904014268e-05, "loss": 0.7633, "step": 600000 }, { "epoch": 4.25, "learning_rate": 2.875955819881054e-05, "loss": 0.7618, "step": 605000 }, { "epoch": 4.28, "learning_rate": 2.8584017357478393e-05, "loss": 0.7603, "step": 610000 }, { "epoch": 4.32, "learning_rate": 2.8408476516146247e-05, "loss": 0.7594, "step": 615000 }, { "epoch": 4.35, "learning_rate": 2.8232935674814104e-05, "loss": 0.7612, "step": 620000 }, { "epoch": 4.39, "learning_rate": 2.805739483348196e-05, "loss": 0.7616, "step": 625000 }, { "epoch": 4.42, "learning_rate": 2.7881853992149816e-05, "loss": 0.7628, "step": 630000 }, { "epoch": 4.46, "learning_rate": 2.770631315081767e-05, "loss": 0.7638, "step": 635000 }, { "epoch": 4.49, "learning_rate": 2.7530772309485525e-05, "loss": 0.7469, "step": 640000 }, { "epoch": 4.53, "learning_rate": 2.7355231468153382e-05, "loss": 0.7477, "step": 645000 }, { "epoch": 4.56, "learning_rate": 2.7179690626821236e-05, "loss": 0.7501, "step": 650000 }, { "epoch": 4.6, "learning_rate": 2.700414978548909e-05, "loss": 0.7513, "step": 655000 }, { "epoch": 4.63, "learning_rate": 2.6828608944156948e-05, "loss": 0.751, "step": 660000 }, { "epoch": 4.67, "learning_rate": 2.6653068102824802e-05, "loss": 0.7491, "step": 665000 }, { "epoch": 4.7, "learning_rate": 2.6477527261492663e-05, "loss": 0.7516, "step": 670000 }, { "epoch": 4.74, "learning_rate": 2.6301986420160517e-05, "loss": 0.7511, "step": 675000 }, { "epoch": 4.74, "eval_loss": 0.6953830122947693, "eval_runtime": 12.1778, "eval_samples_per_second": 82.116, "eval_steps_per_second": 10.265, "step": 675000 }, { "epoch": 4.77, "learning_rate": 2.6126445578828375e-05, "loss": 0.7395, "step": 680000 }, { "epoch": 4.77, "eval_loss": 0.6852219104766846, "eval_runtime": 12.2181, "eval_samples_per_second": 81.846, "eval_steps_per_second": 10.231, "step": 680000 }, { "epoch": 4.81, "learning_rate": 2.595090473749623e-05, "loss": 0.7445, "step": 685000 }, { "epoch": 4.81, "eval_loss": 0.7140026688575745, "eval_runtime": 12.2246, "eval_samples_per_second": 81.802, "eval_steps_per_second": 10.225, "step": 685000 }, { "epoch": 4.84, "learning_rate": 2.5775363896164083e-05, "loss": 0.7448, "step": 690000 }, { "epoch": 4.84, "eval_loss": 0.678001880645752, "eval_runtime": 12.1996, "eval_samples_per_second": 81.97, "eval_steps_per_second": 10.246, "step": 690000 }, { "epoch": 4.88, "learning_rate": 2.559982305483194e-05, "loss": 0.7392, "step": 695000 }, { "epoch": 4.88, "eval_loss": 0.6525120139122009, "eval_runtime": 12.2116, "eval_samples_per_second": 81.889, "eval_steps_per_second": 10.236, "step": 695000 }, { "epoch": 4.92, "learning_rate": 2.5424282213499795e-05, "loss": 0.7499, "step": 700000 }, { "epoch": 4.92, "eval_loss": 0.6543171405792236, "eval_runtime": 12.2207, "eval_samples_per_second": 81.828, "eval_steps_per_second": 10.229, "step": 700000 }, { "epoch": 4.95, "learning_rate": 2.524874137216765e-05, "loss": 0.7393, "step": 705000 }, { "epoch": 4.95, "eval_loss": 0.665242075920105, "eval_runtime": 5.3758, "eval_samples_per_second": 186.02, "eval_steps_per_second": 23.253, "step": 705000 }, { "epoch": 4.99, "learning_rate": 2.5073200530835506e-05, "loss": 0.7324, "step": 710000 }, { "epoch": 4.99, "eval_loss": 0.6618428826332092, "eval_runtime": 5.3906, "eval_samples_per_second": 185.507, "eval_steps_per_second": 23.188, "step": 710000 }, { "epoch": 5.02, "learning_rate": 2.489765968950336e-05, "loss": 0.7261, "step": 715000 }, { "epoch": 5.02, "eval_loss": 0.6423526406288147, "eval_runtime": 5.3892, "eval_samples_per_second": 185.555, "eval_steps_per_second": 23.194, "step": 715000 }, { "epoch": 5.06, "learning_rate": 2.4722118848171215e-05, "loss": 0.7327, "step": 720000 }, { "epoch": 5.06, "eval_loss": 0.6585870385169983, "eval_runtime": 5.3853, "eval_samples_per_second": 185.69, "eval_steps_per_second": 23.211, "step": 720000 }, { "epoch": 5.09, "learning_rate": 2.4546578006839072e-05, "loss": 0.7265, "step": 725000 }, { "epoch": 5.09, "eval_loss": 0.6187921762466431, "eval_runtime": 5.3825, "eval_samples_per_second": 185.787, "eval_steps_per_second": 23.223, "step": 725000 }, { "epoch": 5.13, "learning_rate": 2.4371037165506926e-05, "loss": 0.7247, "step": 730000 }, { "epoch": 5.13, "eval_loss": 0.6582339406013489, "eval_runtime": 5.3823, "eval_samples_per_second": 185.796, "eval_steps_per_second": 23.224, "step": 730000 }, { "epoch": 5.16, "learning_rate": 2.419549632417478e-05, "loss": 0.7265, "step": 735000 }, { "epoch": 5.16, "eval_loss": 0.7226254940032959, "eval_runtime": 5.3797, "eval_samples_per_second": 185.883, "eval_steps_per_second": 23.235, "step": 735000 }, { "epoch": 5.2, "learning_rate": 2.401995548284264e-05, "loss": 0.7166, "step": 740000 }, { "epoch": 5.2, "eval_loss": 0.6698991656303406, "eval_runtime": 5.3768, "eval_samples_per_second": 185.986, "eval_steps_per_second": 23.248, "step": 740000 }, { "epoch": 5.23, "learning_rate": 2.3844414641510496e-05, "loss": 0.7214, "step": 745000 }, { "epoch": 5.23, "eval_loss": 0.6653444170951843, "eval_runtime": 5.3777, "eval_samples_per_second": 185.954, "eval_steps_per_second": 23.244, "step": 745000 }, { "epoch": 5.27, "learning_rate": 2.366887380017835e-05, "loss": 0.7268, "step": 750000 }, { "epoch": 5.27, "eval_loss": 0.6490678787231445, "eval_runtime": 5.3962, "eval_samples_per_second": 185.314, "eval_steps_per_second": 23.164, "step": 750000 }, { "epoch": 5.3, "learning_rate": 2.3493332958846207e-05, "loss": 0.7177, "step": 755000 }, { "epoch": 5.3, "eval_loss": 0.6720253825187683, "eval_runtime": 5.3813, "eval_samples_per_second": 185.828, "eval_steps_per_second": 23.229, "step": 755000 }, { "epoch": 5.34, "learning_rate": 2.331779211751406e-05, "loss": 0.7173, "step": 760000 }, { "epoch": 5.34, "eval_loss": 0.636309027671814, "eval_runtime": 5.3741, "eval_samples_per_second": 186.079, "eval_steps_per_second": 23.26, "step": 760000 }, { "epoch": 5.37, "learning_rate": 2.3142251276181916e-05, "loss": 0.7222, "step": 765000 }, { "epoch": 5.37, "eval_loss": 0.6736326813697815, "eval_runtime": 5.3844, "eval_samples_per_second": 185.723, "eval_steps_per_second": 23.215, "step": 765000 }, { "epoch": 5.41, "learning_rate": 2.2966710434849773e-05, "loss": 0.7189, "step": 770000 }, { "epoch": 5.41, "eval_loss": 0.6502253413200378, "eval_runtime": 5.3808, "eval_samples_per_second": 185.846, "eval_steps_per_second": 23.231, "step": 770000 }, { "epoch": 5.44, "learning_rate": 2.2791169593517627e-05, "loss": 0.7142, "step": 775000 }, { "epoch": 5.44, "eval_loss": 0.6675522327423096, "eval_runtime": 5.3769, "eval_samples_per_second": 185.982, "eval_steps_per_second": 23.248, "step": 775000 }, { "epoch": 5.48, "learning_rate": 2.2615628752185485e-05, "loss": 0.7123, "step": 780000 }, { "epoch": 5.48, "eval_loss": 0.7307547330856323, "eval_runtime": 5.3752, "eval_samples_per_second": 186.041, "eval_steps_per_second": 23.255, "step": 780000 }, { "epoch": 5.51, "learning_rate": 2.2440087910853343e-05, "loss": 0.7149, "step": 785000 }, { "epoch": 5.51, "eval_loss": 0.6528046727180481, "eval_runtime": 5.3874, "eval_samples_per_second": 185.618, "eval_steps_per_second": 23.202, "step": 785000 }, { "epoch": 5.55, "learning_rate": 2.2264547069521197e-05, "loss": 0.7111, "step": 790000 }, { "epoch": 5.55, "eval_loss": 0.6415424942970276, "eval_runtime": 5.3848, "eval_samples_per_second": 185.708, "eval_steps_per_second": 23.213, "step": 790000 }, { "epoch": 5.58, "learning_rate": 2.208900622818905e-05, "loss": 0.7126, "step": 795000 }, { "epoch": 5.58, "eval_loss": 0.664243221282959, "eval_runtime": 5.3818, "eval_samples_per_second": 185.812, "eval_steps_per_second": 23.226, "step": 795000 }, { "epoch": 5.62, "learning_rate": 2.191346538685691e-05, "loss": 0.7075, "step": 800000 }, { "epoch": 5.62, "eval_loss": 0.6190058588981628, "eval_runtime": 5.3768, "eval_samples_per_second": 185.985, "eval_steps_per_second": 23.248, "step": 800000 }, { "epoch": 5.65, "learning_rate": 2.1737924545524763e-05, "loss": 0.7047, "step": 805000 }, { "epoch": 5.65, "eval_loss": 0.645745038986206, "eval_runtime": 5.3814, "eval_samples_per_second": 185.824, "eval_steps_per_second": 23.228, "step": 805000 }, { "epoch": 5.69, "learning_rate": 2.1562383704192617e-05, "loss": 0.7002, "step": 810000 }, { "epoch": 5.69, "eval_loss": 0.6156101226806641, "eval_runtime": 5.3698, "eval_samples_per_second": 186.226, "eval_steps_per_second": 23.278, "step": 810000 } ], "max_steps": 1424170, "num_train_epochs": 10, "total_flos": 3.3150958919481446e+17, "trial_name": null, "trial_params": null }