{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.352, "global_step": 550000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-06, "loss": 16.5558, "step": 100 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 11.6232, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.5e-05, "loss": 8.3396, "step": 300 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 6.7527, "step": 400 }, { "epoch": 0.0, "learning_rate": 2.5e-05, "loss": 5.5018, "step": 500 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 4.7658, "step": 600 }, { "epoch": 0.0, "learning_rate": 3.5e-05, "loss": 4.1339, "step": 700 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.6686, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.5e-05, "loss": 3.4014, "step": 900 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 3.2833, "step": 1000 }, { "epoch": 0.0, "eval_loss": 2.328969717025757, "eval_runtime": 92.037, "eval_samples_per_second": 108.652, "eval_steps_per_second": 6.791, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999679795068844e-05, "loss": 3.2188, "step": 1100 }, { "epoch": 0.0, "learning_rate": 4.999359590137688e-05, "loss": 3.0852, "step": 1200 }, { "epoch": 0.0, "learning_rate": 4.999039385206533e-05, "loss": 3.0373, "step": 1300 }, { "epoch": 0.0, "learning_rate": 4.998719180275376e-05, "loss": 2.9934, "step": 1400 }, { "epoch": 0.0, "learning_rate": 4.998398975344221e-05, "loss": 2.9485, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.998078770413064e-05, "loss": 2.9135, "step": 1600 }, { "epoch": 0.0, "learning_rate": 4.997758565481909e-05, "loss": 2.881, "step": 1700 }, { "epoch": 0.0, "learning_rate": 4.997438360550753e-05, "loss": 2.832, "step": 1800 }, { "epoch": 0.0, "learning_rate": 4.997118155619597e-05, "loss": 2.8239, "step": 1900 }, { "epoch": 0.0, "learning_rate": 4.996797950688441e-05, "loss": 2.8208, "step": 2000 }, { "epoch": 0.0, "eval_loss": 2.1989951133728027, "eval_runtime": 99.122, "eval_samples_per_second": 100.886, "eval_steps_per_second": 6.305, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.9964777457572846e-05, "loss": 2.772, "step": 2100 }, { "epoch": 0.0, "learning_rate": 4.996157540826129e-05, "loss": 2.7496, "step": 2200 }, { "epoch": 0.0, "learning_rate": 4.9958373358949726e-05, "loss": 2.7388, "step": 2300 }, { "epoch": 0.0, "learning_rate": 4.995517130963817e-05, "loss": 2.6978, "step": 2400 }, { "epoch": 0.0, "learning_rate": 4.995196926032661e-05, "loss": 2.6946, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.994876721101505e-05, "loss": 2.6965, "step": 2600 }, { "epoch": 0.0, "learning_rate": 4.994556516170349e-05, "loss": 2.6772, "step": 2700 }, { "epoch": 0.0, "learning_rate": 4.994236311239193e-05, "loss": 2.6738, "step": 2800 }, { "epoch": 0.0, "learning_rate": 4.993916106308038e-05, "loss": 2.6603, "step": 2900 }, { "epoch": 0.0, "learning_rate": 4.993595901376881e-05, "loss": 2.6273, "step": 3000 }, { "epoch": 0.0, "eval_loss": 2.1234536170959473, "eval_runtime": 103.3883, "eval_samples_per_second": 96.723, "eval_steps_per_second": 6.045, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.993275696445726e-05, "loss": 2.5833, "step": 3100 }, { "epoch": 0.0, "learning_rate": 4.992955491514569e-05, "loss": 2.6281, "step": 3200 }, { "epoch": 0.0, "learning_rate": 4.992635286583414e-05, "loss": 2.6009, "step": 3300 }, { "epoch": 0.0, "learning_rate": 4.9923150816522576e-05, "loss": 2.5637, "step": 3400 }, { "epoch": 0.0, "learning_rate": 4.9919948767211016e-05, "loss": 2.5687, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.991674671789946e-05, "loss": 2.5313, "step": 3600 }, { "epoch": 0.0, "learning_rate": 4.9913544668587896e-05, "loss": 2.5543, "step": 3700 }, { "epoch": 0.0, "learning_rate": 4.991034261927634e-05, "loss": 2.5477, "step": 3800 }, { "epoch": 0.0, "learning_rate": 4.9907140569964775e-05, "loss": 2.5552, "step": 3900 }, { "epoch": 0.0, "learning_rate": 4.990393852065322e-05, "loss": 2.5297, "step": 4000 }, { "epoch": 0.0, "eval_loss": 2.0755105018615723, "eval_runtime": 107.7424, "eval_samples_per_second": 92.814, "eval_steps_per_second": 5.801, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.990073647134166e-05, "loss": 2.5211, "step": 4100 }, { "epoch": 0.0, "learning_rate": 4.98975344220301e-05, "loss": 2.5055, "step": 4200 }, { "epoch": 0.0, "learning_rate": 4.989433237271854e-05, "loss": 2.5019, "step": 4300 }, { "epoch": 0.0, "learning_rate": 4.989113032340698e-05, "loss": 2.512, "step": 4400 }, { "epoch": 0.0, "learning_rate": 4.988792827409543e-05, "loss": 2.5242, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.988472622478386e-05, "loss": 2.4919, "step": 4600 }, { "epoch": 0.0, "learning_rate": 4.9881524175472307e-05, "loss": 2.5046, "step": 4700 }, { "epoch": 0.0, "learning_rate": 4.987832212616074e-05, "loss": 2.4496, "step": 4800 }, { "epoch": 0.0, "learning_rate": 4.9875120076849186e-05, "loss": 2.4872, "step": 4900 }, { "epoch": 0.0, "learning_rate": 4.9871918027537626e-05, "loss": 2.4754, "step": 5000 }, { "epoch": 0.0, "eval_loss": 2.056119680404663, "eval_runtime": 94.5616, "eval_samples_per_second": 105.751, "eval_steps_per_second": 6.609, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.9868715978226066e-05, "loss": 2.4718, "step": 5100 }, { "epoch": 0.0, "learning_rate": 4.986551392891451e-05, "loss": 2.4577, "step": 5200 }, { "epoch": 0.0, "learning_rate": 4.9862311879602945e-05, "loss": 2.4671, "step": 5300 }, { "epoch": 0.0, "learning_rate": 4.985910983029139e-05, "loss": 2.4654, "step": 5400 }, { "epoch": 0.0, "learning_rate": 4.9855907780979825e-05, "loss": 2.4614, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.985270573166827e-05, "loss": 2.4281, "step": 5600 }, { "epoch": 0.0, "learning_rate": 4.984950368235671e-05, "loss": 2.4488, "step": 5700 }, { "epoch": 0.0, "learning_rate": 4.984630163304515e-05, "loss": 2.434, "step": 5800 }, { "epoch": 0.0, "learning_rate": 4.984309958373359e-05, "loss": 2.4166, "step": 5900 }, { "epoch": 0.0, "learning_rate": 4.983989753442203e-05, "loss": 2.4283, "step": 6000 }, { "epoch": 0.0, "eval_loss": 2.0324013233184814, "eval_runtime": 93.3277, "eval_samples_per_second": 107.149, "eval_steps_per_second": 6.697, "step": 6000 }, { "epoch": 0.0, "learning_rate": 4.9836695485110477e-05, "loss": 2.4443, "step": 6100 }, { "epoch": 0.0, "learning_rate": 4.983349343579891e-05, "loss": 2.4378, "step": 6200 }, { "epoch": 0.0, "learning_rate": 4.9830291386487356e-05, "loss": 2.4199, "step": 6300 }, { "epoch": 0.0, "learning_rate": 4.9827089337175796e-05, "loss": 2.4355, "step": 6400 }, { "epoch": 0.0, "learning_rate": 4.9823887287864235e-05, "loss": 2.4162, "step": 6500 }, { "epoch": 0.0, "learning_rate": 4.9820685238552675e-05, "loss": 2.3975, "step": 6600 }, { "epoch": 0.0, "learning_rate": 4.9817483189241115e-05, "loss": 2.4064, "step": 6700 }, { "epoch": 0.0, "learning_rate": 4.981428113992956e-05, "loss": 2.3931, "step": 6800 }, { "epoch": 0.0, "learning_rate": 4.9811079090617994e-05, "loss": 2.3968, "step": 6900 }, { "epoch": 0.0, "learning_rate": 4.980787704130644e-05, "loss": 2.4042, "step": 7000 }, { "epoch": 0.0, "eval_loss": 2.0294625759124756, "eval_runtime": 92.6191, "eval_samples_per_second": 107.969, "eval_steps_per_second": 6.748, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.980467499199488e-05, "loss": 2.4216, "step": 7100 }, { "epoch": 0.0, "learning_rate": 4.980147294268332e-05, "loss": 2.3645, "step": 7200 }, { "epoch": 0.0, "learning_rate": 4.979827089337176e-05, "loss": 2.3518, "step": 7300 }, { "epoch": 0.0, "learning_rate": 4.97950688440602e-05, "loss": 2.3816, "step": 7400 }, { "epoch": 0.0, "learning_rate": 4.979186679474864e-05, "loss": 2.401, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.978866474543708e-05, "loss": 2.3834, "step": 7600 }, { "epoch": 0.0, "learning_rate": 4.9785462696125526e-05, "loss": 2.3866, "step": 7700 }, { "epoch": 0.0, "learning_rate": 4.978226064681396e-05, "loss": 2.3667, "step": 7800 }, { "epoch": 0.01, "learning_rate": 4.9779058597502405e-05, "loss": 2.3569, "step": 7900 }, { "epoch": 0.01, "learning_rate": 4.9775856548190845e-05, "loss": 2.3838, "step": 8000 }, { "epoch": 0.01, "eval_loss": 2.00341534614563, "eval_runtime": 93.3834, "eval_samples_per_second": 107.085, "eval_steps_per_second": 6.693, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.9772654498879285e-05, "loss": 2.3605, "step": 8100 }, { "epoch": 0.01, "learning_rate": 4.9769452449567725e-05, "loss": 2.3569, "step": 8200 }, { "epoch": 0.01, "learning_rate": 4.9766250400256164e-05, "loss": 2.354, "step": 8300 }, { "epoch": 0.01, "learning_rate": 4.976304835094461e-05, "loss": 2.3638, "step": 8400 }, { "epoch": 0.01, "learning_rate": 4.9759846301633044e-05, "loss": 2.3698, "step": 8500 }, { "epoch": 0.01, "learning_rate": 4.975664425232149e-05, "loss": 2.3685, "step": 8600 }, { "epoch": 0.01, "learning_rate": 4.975344220300993e-05, "loss": 2.3178, "step": 8700 }, { "epoch": 0.01, "learning_rate": 4.975024015369837e-05, "loss": 2.3356, "step": 8800 }, { "epoch": 0.01, "learning_rate": 4.974703810438681e-05, "loss": 2.3412, "step": 8900 }, { "epoch": 0.01, "learning_rate": 4.974383605507525e-05, "loss": 2.354, "step": 9000 }, { "epoch": 0.01, "eval_loss": 1.9964886903762817, "eval_runtime": 93.8664, "eval_samples_per_second": 106.534, "eval_steps_per_second": 6.658, "step": 9000 }, { "epoch": 0.01, "learning_rate": 4.974063400576369e-05, "loss": 2.3498, "step": 9100 }, { "epoch": 0.01, "learning_rate": 4.973743195645213e-05, "loss": 2.3712, "step": 9200 }, { "epoch": 0.01, "learning_rate": 4.9734229907140575e-05, "loss": 2.3465, "step": 9300 }, { "epoch": 0.01, "learning_rate": 4.9731027857829015e-05, "loss": 2.3606, "step": 9400 }, { "epoch": 0.01, "learning_rate": 4.9727825808517455e-05, "loss": 2.3704, "step": 9500 }, { "epoch": 0.01, "learning_rate": 4.9724623759205894e-05, "loss": 2.3553, "step": 9600 }, { "epoch": 0.01, "learning_rate": 4.9721421709894334e-05, "loss": 2.3421, "step": 9700 }, { "epoch": 0.01, "learning_rate": 4.9718219660582774e-05, "loss": 2.3405, "step": 9800 }, { "epoch": 0.01, "learning_rate": 4.9715017611271214e-05, "loss": 2.3337, "step": 9900 }, { "epoch": 0.01, "learning_rate": 4.971181556195966e-05, "loss": 2.3341, "step": 10000 }, { "epoch": 0.01, "eval_loss": 1.9892410039901733, "eval_runtime": 103.046, "eval_samples_per_second": 97.044, "eval_steps_per_second": 6.065, "step": 10000 }, { "epoch": 0.01, "learning_rate": 4.970861351264809e-05, "loss": 2.3465, "step": 10100 }, { "epoch": 0.01, "learning_rate": 4.970541146333654e-05, "loss": 2.3463, "step": 10200 }, { "epoch": 0.01, "learning_rate": 4.970220941402498e-05, "loss": 2.3376, "step": 10300 }, { "epoch": 0.01, "learning_rate": 4.969900736471342e-05, "loss": 2.3321, "step": 10400 }, { "epoch": 0.01, "learning_rate": 4.969580531540186e-05, "loss": 2.3337, "step": 10500 }, { "epoch": 0.01, "learning_rate": 4.96926032660903e-05, "loss": 2.312, "step": 10600 }, { "epoch": 0.01, "learning_rate": 4.968940121677874e-05, "loss": 2.3308, "step": 10700 }, { "epoch": 0.01, "learning_rate": 4.968619916746718e-05, "loss": 2.3227, "step": 10800 }, { "epoch": 0.01, "learning_rate": 4.9682997118155625e-05, "loss": 2.315, "step": 10900 }, { "epoch": 0.01, "learning_rate": 4.9679795068844064e-05, "loss": 2.3184, "step": 11000 }, { "epoch": 0.01, "eval_loss": 1.980800747871399, "eval_runtime": 106.7198, "eval_samples_per_second": 93.703, "eval_steps_per_second": 5.856, "step": 11000 }, { "epoch": 0.01, "learning_rate": 4.9676593019532504e-05, "loss": 2.3127, "step": 11100 }, { "epoch": 0.01, "learning_rate": 4.9673390970220944e-05, "loss": 2.3413, "step": 11200 }, { "epoch": 0.01, "learning_rate": 4.9670188920909384e-05, "loss": 2.2835, "step": 11300 }, { "epoch": 0.01, "learning_rate": 4.966698687159782e-05, "loss": 2.3045, "step": 11400 }, { "epoch": 0.01, "learning_rate": 4.966378482228626e-05, "loss": 2.324, "step": 11500 }, { "epoch": 0.01, "learning_rate": 4.966058277297471e-05, "loss": 2.3263, "step": 11600 }, { "epoch": 0.01, "learning_rate": 4.965738072366315e-05, "loss": 2.3009, "step": 11700 }, { "epoch": 0.01, "learning_rate": 4.965417867435159e-05, "loss": 2.3036, "step": 11800 }, { "epoch": 0.01, "learning_rate": 4.965097662504003e-05, "loss": 2.2912, "step": 11900 }, { "epoch": 0.01, "learning_rate": 4.964777457572847e-05, "loss": 2.3192, "step": 12000 }, { "epoch": 0.01, "eval_loss": 1.9739996194839478, "eval_runtime": 98.5238, "eval_samples_per_second": 101.498, "eval_steps_per_second": 6.344, "step": 12000 }, { "epoch": 0.01, "learning_rate": 4.964457252641691e-05, "loss": 2.2943, "step": 12100 }, { "epoch": 0.01, "learning_rate": 4.964137047710535e-05, "loss": 2.294, "step": 12200 }, { "epoch": 0.01, "learning_rate": 4.963816842779379e-05, "loss": 2.2956, "step": 12300 }, { "epoch": 0.01, "learning_rate": 4.963496637848223e-05, "loss": 2.3001, "step": 12400 }, { "epoch": 0.01, "learning_rate": 4.9631764329170674e-05, "loss": 2.2732, "step": 12500 }, { "epoch": 0.01, "learning_rate": 4.9628562279859114e-05, "loss": 2.28, "step": 12600 }, { "epoch": 0.01, "learning_rate": 4.9625360230547553e-05, "loss": 2.3042, "step": 12700 }, { "epoch": 0.01, "learning_rate": 4.962215818123599e-05, "loss": 2.2872, "step": 12800 }, { "epoch": 0.01, "learning_rate": 4.961895613192443e-05, "loss": 2.2695, "step": 12900 }, { "epoch": 0.01, "learning_rate": 4.961575408261287e-05, "loss": 2.2921, "step": 13000 }, { "epoch": 0.01, "eval_loss": 1.9611095190048218, "eval_runtime": 93.9086, "eval_samples_per_second": 106.487, "eval_steps_per_second": 6.655, "step": 13000 }, { "epoch": 0.01, "learning_rate": 4.961255203330131e-05, "loss": 2.2842, "step": 13100 }, { "epoch": 0.01, "learning_rate": 4.960934998398976e-05, "loss": 2.2989, "step": 13200 }, { "epoch": 0.01, "learning_rate": 4.96061479346782e-05, "loss": 2.2998, "step": 13300 }, { "epoch": 0.01, "learning_rate": 4.960294588536664e-05, "loss": 2.2697, "step": 13400 }, { "epoch": 0.01, "learning_rate": 4.959974383605508e-05, "loss": 2.2843, "step": 13500 }, { "epoch": 0.01, "learning_rate": 4.959654178674352e-05, "loss": 2.2725, "step": 13600 }, { "epoch": 0.01, "learning_rate": 4.959333973743196e-05, "loss": 2.3003, "step": 13700 }, { "epoch": 0.01, "learning_rate": 4.95901376881204e-05, "loss": 2.271, "step": 13800 }, { "epoch": 0.01, "learning_rate": 4.958693563880884e-05, "loss": 2.2842, "step": 13900 }, { "epoch": 0.01, "learning_rate": 4.9583733589497284e-05, "loss": 2.2459, "step": 14000 }, { "epoch": 0.01, "eval_loss": 1.9538640975952148, "eval_runtime": 94.0635, "eval_samples_per_second": 106.311, "eval_steps_per_second": 6.644, "step": 14000 }, { "epoch": 0.01, "learning_rate": 4.958053154018572e-05, "loss": 2.2996, "step": 14100 }, { "epoch": 0.01, "learning_rate": 4.957732949087416e-05, "loss": 2.294, "step": 14200 }, { "epoch": 0.01, "learning_rate": 4.95741274415626e-05, "loss": 2.2627, "step": 14300 }, { "epoch": 0.01, "learning_rate": 4.957092539225104e-05, "loss": 2.2703, "step": 14400 }, { "epoch": 0.01, "learning_rate": 4.956772334293948e-05, "loss": 2.2956, "step": 14500 }, { "epoch": 0.01, "learning_rate": 4.956452129362792e-05, "loss": 2.2706, "step": 14600 }, { "epoch": 0.01, "learning_rate": 4.956131924431636e-05, "loss": 2.2549, "step": 14700 }, { "epoch": 0.01, "learning_rate": 4.955811719500481e-05, "loss": 2.2685, "step": 14800 }, { "epoch": 0.01, "learning_rate": 4.955491514569325e-05, "loss": 2.2879, "step": 14900 }, { "epoch": 0.01, "learning_rate": 4.955171309638169e-05, "loss": 2.289, "step": 15000 }, { "epoch": 0.01, "eval_loss": 1.9509644508361816, "eval_runtime": 93.9059, "eval_samples_per_second": 106.49, "eval_steps_per_second": 6.656, "step": 15000 }, { "epoch": 0.01, "learning_rate": 4.954851104707013e-05, "loss": 2.2526, "step": 15100 }, { "epoch": 0.01, "learning_rate": 4.954530899775857e-05, "loss": 2.2344, "step": 15200 }, { "epoch": 0.01, "learning_rate": 4.954210694844701e-05, "loss": 2.2507, "step": 15300 }, { "epoch": 0.01, "learning_rate": 4.953890489913545e-05, "loss": 2.2477, "step": 15400 }, { "epoch": 0.01, "learning_rate": 4.9535702849823886e-05, "loss": 2.2564, "step": 15500 }, { "epoch": 0.01, "learning_rate": 4.953250080051233e-05, "loss": 2.2415, "step": 15600 }, { "epoch": 0.01, "learning_rate": 4.952929875120077e-05, "loss": 2.2768, "step": 15700 }, { "epoch": 0.01, "learning_rate": 4.952609670188921e-05, "loss": 2.2689, "step": 15800 }, { "epoch": 0.01, "learning_rate": 4.952289465257765e-05, "loss": 2.252, "step": 15900 }, { "epoch": 0.01, "learning_rate": 4.951969260326609e-05, "loss": 2.2496, "step": 16000 }, { "epoch": 0.01, "eval_loss": 1.9454293251037598, "eval_runtime": 95.8696, "eval_samples_per_second": 104.308, "eval_steps_per_second": 6.519, "step": 16000 }, { "epoch": 0.01, "learning_rate": 4.951649055395453e-05, "loss": 2.2811, "step": 16100 }, { "epoch": 0.01, "learning_rate": 4.951328850464297e-05, "loss": 2.2652, "step": 16200 }, { "epoch": 0.01, "learning_rate": 4.951008645533142e-05, "loss": 2.2328, "step": 16300 }, { "epoch": 0.01, "learning_rate": 4.950688440601986e-05, "loss": 2.2545, "step": 16400 }, { "epoch": 0.01, "learning_rate": 4.95036823567083e-05, "loss": 2.2392, "step": 16500 }, { "epoch": 0.01, "learning_rate": 4.950048030739674e-05, "loss": 2.2512, "step": 16600 }, { "epoch": 0.01, "learning_rate": 4.949727825808518e-05, "loss": 2.2348, "step": 16700 }, { "epoch": 0.01, "learning_rate": 4.949407620877362e-05, "loss": 2.256, "step": 16800 }, { "epoch": 0.01, "learning_rate": 4.9490874159462056e-05, "loss": 2.2641, "step": 16900 }, { "epoch": 0.01, "learning_rate": 4.94876721101505e-05, "loss": 2.2386, "step": 17000 }, { "epoch": 0.01, "eval_loss": 1.9414763450622559, "eval_runtime": 94.5137, "eval_samples_per_second": 105.805, "eval_steps_per_second": 6.613, "step": 17000 }, { "epoch": 0.01, "learning_rate": 4.9484470060838936e-05, "loss": 2.2579, "step": 17100 }, { "epoch": 0.01, "learning_rate": 4.948126801152738e-05, "loss": 2.2305, "step": 17200 }, { "epoch": 0.01, "learning_rate": 4.947806596221582e-05, "loss": 2.2651, "step": 17300 }, { "epoch": 0.01, "learning_rate": 4.947486391290426e-05, "loss": 2.26, "step": 17400 }, { "epoch": 0.01, "learning_rate": 4.94716618635927e-05, "loss": 2.2073, "step": 17500 }, { "epoch": 0.01, "learning_rate": 4.946845981428114e-05, "loss": 2.232, "step": 17600 }, { "epoch": 0.01, "learning_rate": 4.946525776496958e-05, "loss": 2.2545, "step": 17700 }, { "epoch": 0.01, "learning_rate": 4.946205571565802e-05, "loss": 2.2617, "step": 17800 }, { "epoch": 0.01, "learning_rate": 4.945885366634647e-05, "loss": 2.2416, "step": 17900 }, { "epoch": 0.01, "learning_rate": 4.945565161703491e-05, "loss": 2.2241, "step": 18000 }, { "epoch": 0.01, "eval_loss": 1.9346272945404053, "eval_runtime": 97.9573, "eval_samples_per_second": 102.085, "eval_steps_per_second": 6.38, "step": 18000 }, { "epoch": 0.01, "learning_rate": 4.945244956772335e-05, "loss": 2.2567, "step": 18100 }, { "epoch": 0.01, "learning_rate": 4.9449247518411787e-05, "loss": 2.2434, "step": 18200 }, { "epoch": 0.01, "learning_rate": 4.9446045469100226e-05, "loss": 2.2426, "step": 18300 }, { "epoch": 0.01, "learning_rate": 4.9442843419788666e-05, "loss": 2.2055, "step": 18400 }, { "epoch": 0.01, "learning_rate": 4.9439641370477106e-05, "loss": 2.2429, "step": 18500 }, { "epoch": 0.01, "learning_rate": 4.943643932116555e-05, "loss": 2.2063, "step": 18600 }, { "epoch": 0.01, "learning_rate": 4.9433237271853985e-05, "loss": 2.2033, "step": 18700 }, { "epoch": 0.01, "learning_rate": 4.943003522254243e-05, "loss": 2.2291, "step": 18800 }, { "epoch": 0.01, "learning_rate": 4.942683317323087e-05, "loss": 2.233, "step": 18900 }, { "epoch": 0.01, "learning_rate": 4.942363112391931e-05, "loss": 2.2262, "step": 19000 }, { "epoch": 0.01, "eval_loss": 1.9363772869110107, "eval_runtime": 102.767, "eval_samples_per_second": 97.307, "eval_steps_per_second": 6.082, "step": 19000 }, { "epoch": 0.01, "learning_rate": 4.942042907460775e-05, "loss": 2.2238, "step": 19100 }, { "epoch": 0.01, "learning_rate": 4.941722702529619e-05, "loss": 2.2397, "step": 19200 }, { "epoch": 0.01, "learning_rate": 4.941402497598464e-05, "loss": 2.22, "step": 19300 }, { "epoch": 0.01, "learning_rate": 4.941082292667307e-05, "loss": 2.2246, "step": 19400 }, { "epoch": 0.01, "learning_rate": 4.940762087736152e-05, "loss": 2.2381, "step": 19500 }, { "epoch": 0.01, "learning_rate": 4.9404418828049956e-05, "loss": 2.2312, "step": 19600 }, { "epoch": 0.01, "learning_rate": 4.9401216778738396e-05, "loss": 2.2036, "step": 19700 }, { "epoch": 0.01, "learning_rate": 4.9398014729426836e-05, "loss": 2.2405, "step": 19800 }, { "epoch": 0.01, "learning_rate": 4.9394812680115276e-05, "loss": 2.2137, "step": 19900 }, { "epoch": 0.01, "learning_rate": 4.9391610630803715e-05, "loss": 2.214, "step": 20000 }, { "epoch": 0.01, "eval_loss": 1.9283695220947266, "eval_runtime": 97.55, "eval_samples_per_second": 102.512, "eval_steps_per_second": 6.407, "step": 20000 }, { "epoch": 0.01, "learning_rate": 4.9388408581492155e-05, "loss": 2.2319, "step": 20100 }, { "epoch": 0.01, "learning_rate": 4.93852065321806e-05, "loss": 2.1994, "step": 20200 }, { "epoch": 0.01, "learning_rate": 4.9382004482869035e-05, "loss": 2.2355, "step": 20300 }, { "epoch": 0.01, "learning_rate": 4.937880243355748e-05, "loss": 2.2035, "step": 20400 }, { "epoch": 0.01, "learning_rate": 4.937560038424592e-05, "loss": 2.2042, "step": 20500 }, { "epoch": 0.01, "learning_rate": 4.937239833493436e-05, "loss": 2.2087, "step": 20600 }, { "epoch": 0.01, "learning_rate": 4.93691962856228e-05, "loss": 2.2062, "step": 20700 }, { "epoch": 0.01, "learning_rate": 4.936599423631124e-05, "loss": 2.2015, "step": 20800 }, { "epoch": 0.01, "learning_rate": 4.9362792186999687e-05, "loss": 2.2287, "step": 20900 }, { "epoch": 0.01, "learning_rate": 4.935959013768812e-05, "loss": 2.213, "step": 21000 }, { "epoch": 0.01, "eval_loss": 1.9262499809265137, "eval_runtime": 94.8809, "eval_samples_per_second": 105.395, "eval_steps_per_second": 6.587, "step": 21000 }, { "epoch": 0.01, "learning_rate": 4.9356388088376566e-05, "loss": 2.2075, "step": 21100 }, { "epoch": 0.01, "learning_rate": 4.9353186039065006e-05, "loss": 2.2084, "step": 21200 }, { "epoch": 0.01, "learning_rate": 4.9349983989753446e-05, "loss": 2.2094, "step": 21300 }, { "epoch": 0.01, "learning_rate": 4.9346781940441885e-05, "loss": 2.2056, "step": 21400 }, { "epoch": 0.01, "learning_rate": 4.9343579891130325e-05, "loss": 2.2213, "step": 21500 }, { "epoch": 0.01, "learning_rate": 4.934037784181877e-05, "loss": 2.1896, "step": 21600 }, { "epoch": 0.01, "learning_rate": 4.9337175792507204e-05, "loss": 2.1973, "step": 21700 }, { "epoch": 0.01, "learning_rate": 4.933397374319565e-05, "loss": 2.2015, "step": 21800 }, { "epoch": 0.01, "learning_rate": 4.9330771693884084e-05, "loss": 2.2119, "step": 21900 }, { "epoch": 0.01, "learning_rate": 4.932756964457253e-05, "loss": 2.18, "step": 22000 }, { "epoch": 0.01, "eval_loss": 1.923356294631958, "eval_runtime": 93.6024, "eval_samples_per_second": 106.835, "eval_steps_per_second": 6.677, "step": 22000 }, { "epoch": 0.01, "learning_rate": 4.932436759526097e-05, "loss": 2.2053, "step": 22100 }, { "epoch": 0.01, "learning_rate": 4.932116554594941e-05, "loss": 2.2045, "step": 22200 }, { "epoch": 0.01, "learning_rate": 4.931796349663785e-05, "loss": 2.217, "step": 22300 }, { "epoch": 0.01, "learning_rate": 4.931476144732629e-05, "loss": 2.1951, "step": 22400 }, { "epoch": 0.01, "learning_rate": 4.9311559398014736e-05, "loss": 2.2074, "step": 22500 }, { "epoch": 0.01, "learning_rate": 4.930835734870317e-05, "loss": 2.1891, "step": 22600 }, { "epoch": 0.01, "learning_rate": 4.9305155299391615e-05, "loss": 2.1913, "step": 22700 }, { "epoch": 0.01, "learning_rate": 4.9301953250080055e-05, "loss": 2.1954, "step": 22800 }, { "epoch": 0.01, "learning_rate": 4.9298751200768495e-05, "loss": 2.1805, "step": 22900 }, { "epoch": 0.01, "learning_rate": 4.9295549151456935e-05, "loss": 2.1812, "step": 23000 }, { "epoch": 0.01, "eval_loss": 1.9187208414077759, "eval_runtime": 94.8254, "eval_samples_per_second": 105.457, "eval_steps_per_second": 6.591, "step": 23000 }, { "epoch": 0.01, "learning_rate": 4.9292347102145374e-05, "loss": 2.1875, "step": 23100 }, { "epoch": 0.01, "learning_rate": 4.928914505283382e-05, "loss": 2.202, "step": 23200 }, { "epoch": 0.01, "learning_rate": 4.9285943003522254e-05, "loss": 2.1911, "step": 23300 }, { "epoch": 0.01, "learning_rate": 4.92827409542107e-05, "loss": 2.1989, "step": 23400 }, { "epoch": 0.02, "learning_rate": 4.927953890489913e-05, "loss": 2.1926, "step": 23500 }, { "epoch": 0.02, "learning_rate": 4.927633685558758e-05, "loss": 2.1912, "step": 23600 }, { "epoch": 0.02, "learning_rate": 4.927313480627602e-05, "loss": 2.2012, "step": 23700 }, { "epoch": 0.02, "learning_rate": 4.926993275696446e-05, "loss": 2.1962, "step": 23800 }, { "epoch": 0.02, "learning_rate": 4.9266730707652906e-05, "loss": 2.1827, "step": 23900 }, { "epoch": 0.02, "learning_rate": 4.926352865834134e-05, "loss": 2.1918, "step": 24000 }, { "epoch": 0.02, "eval_loss": 1.9133304357528687, "eval_runtime": 95.7705, "eval_samples_per_second": 104.416, "eval_steps_per_second": 6.526, "step": 24000 }, { "epoch": 0.02, "learning_rate": 4.9260326609029785e-05, "loss": 2.1894, "step": 24100 }, { "epoch": 0.02, "learning_rate": 4.925712455971822e-05, "loss": 2.1884, "step": 24200 }, { "epoch": 0.02, "learning_rate": 4.9253922510406665e-05, "loss": 2.1678, "step": 24300 }, { "epoch": 0.02, "learning_rate": 4.9250720461095105e-05, "loss": 2.1861, "step": 24400 }, { "epoch": 0.02, "learning_rate": 4.9247518411783544e-05, "loss": 2.2062, "step": 24500 }, { "epoch": 0.02, "learning_rate": 4.9244316362471984e-05, "loss": 2.1757, "step": 24600 }, { "epoch": 0.02, "learning_rate": 4.9241114313160424e-05, "loss": 2.2047, "step": 24700 }, { "epoch": 0.02, "learning_rate": 4.923791226384887e-05, "loss": 2.1928, "step": 24800 }, { "epoch": 0.02, "learning_rate": 4.92347102145373e-05, "loss": 2.182, "step": 24900 }, { "epoch": 0.02, "learning_rate": 4.923150816522575e-05, "loss": 2.1619, "step": 25000 }, { "epoch": 0.02, "eval_loss": 1.9081039428710938, "eval_runtime": 96.4994, "eval_samples_per_second": 103.628, "eval_steps_per_second": 6.477, "step": 25000 }, { "epoch": 0.02, "learning_rate": 4.922830611591418e-05, "loss": 2.1699, "step": 25100 }, { "epoch": 0.02, "learning_rate": 4.922510406660263e-05, "loss": 2.1909, "step": 25200 }, { "epoch": 0.02, "learning_rate": 4.922190201729107e-05, "loss": 2.2124, "step": 25300 }, { "epoch": 0.02, "learning_rate": 4.921869996797951e-05, "loss": 2.1656, "step": 25400 }, { "epoch": 0.02, "learning_rate": 4.9215497918667955e-05, "loss": 2.2039, "step": 25500 }, { "epoch": 0.02, "learning_rate": 4.921229586935639e-05, "loss": 2.188, "step": 25600 }, { "epoch": 0.02, "learning_rate": 4.9209093820044835e-05, "loss": 2.1772, "step": 25700 }, { "epoch": 0.02, "learning_rate": 4.920589177073327e-05, "loss": 2.176, "step": 25800 }, { "epoch": 0.02, "learning_rate": 4.9202689721421714e-05, "loss": 2.159, "step": 25900 }, { "epoch": 0.02, "learning_rate": 4.9199487672110154e-05, "loss": 2.1702, "step": 26000 }, { "epoch": 0.02, "eval_loss": 1.9062594175338745, "eval_runtime": 93.3413, "eval_samples_per_second": 107.134, "eval_steps_per_second": 6.696, "step": 26000 }, { "epoch": 0.02, "learning_rate": 4.9196285622798594e-05, "loss": 2.1836, "step": 26100 }, { "epoch": 0.02, "learning_rate": 4.919308357348703e-05, "loss": 2.1743, "step": 26200 }, { "epoch": 0.02, "learning_rate": 4.918988152417547e-05, "loss": 2.1595, "step": 26300 }, { "epoch": 0.02, "learning_rate": 4.918667947486392e-05, "loss": 2.1608, "step": 26400 }, { "epoch": 0.02, "learning_rate": 4.918347742555235e-05, "loss": 2.1483, "step": 26500 }, { "epoch": 0.02, "learning_rate": 4.91802753762408e-05, "loss": 2.1817, "step": 26600 }, { "epoch": 0.02, "learning_rate": 4.917707332692923e-05, "loss": 2.1754, "step": 26700 }, { "epoch": 0.02, "learning_rate": 4.917387127761768e-05, "loss": 2.2126, "step": 26800 }, { "epoch": 0.02, "learning_rate": 4.917066922830612e-05, "loss": 2.1628, "step": 26900 }, { "epoch": 0.02, "learning_rate": 4.916746717899456e-05, "loss": 2.1694, "step": 27000 }, { "epoch": 0.02, "eval_loss": 1.9066354036331177, "eval_runtime": 95.6786, "eval_samples_per_second": 104.517, "eval_steps_per_second": 6.532, "step": 27000 }, { "epoch": 0.02, "learning_rate": 4.9164265129683005e-05, "loss": 2.1694, "step": 27100 }, { "epoch": 0.02, "learning_rate": 4.916106308037144e-05, "loss": 2.1908, "step": 27200 }, { "epoch": 0.02, "learning_rate": 4.9157861031059884e-05, "loss": 2.1697, "step": 27300 }, { "epoch": 0.02, "learning_rate": 4.915465898174832e-05, "loss": 2.1899, "step": 27400 }, { "epoch": 0.02, "learning_rate": 4.9151456932436764e-05, "loss": 2.1551, "step": 27500 }, { "epoch": 0.02, "learning_rate": 4.9148254883125197e-05, "loss": 2.1518, "step": 27600 }, { "epoch": 0.02, "learning_rate": 4.914505283381364e-05, "loss": 2.1537, "step": 27700 }, { "epoch": 0.02, "learning_rate": 4.914185078450208e-05, "loss": 2.1658, "step": 27800 }, { "epoch": 0.02, "learning_rate": 4.913864873519052e-05, "loss": 2.1676, "step": 27900 }, { "epoch": 0.02, "learning_rate": 4.913544668587897e-05, "loss": 2.1734, "step": 28000 }, { "epoch": 0.02, "eval_loss": 1.9035944938659668, "eval_runtime": 98.1969, "eval_samples_per_second": 101.836, "eval_steps_per_second": 6.365, "step": 28000 }, { "epoch": 0.02, "learning_rate": 4.91322446365674e-05, "loss": 2.1654, "step": 28100 }, { "epoch": 0.02, "learning_rate": 4.912904258725585e-05, "loss": 2.1812, "step": 28200 }, { "epoch": 0.02, "learning_rate": 4.912584053794428e-05, "loss": 2.1562, "step": 28300 }, { "epoch": 0.02, "learning_rate": 4.912263848863273e-05, "loss": 2.1349, "step": 28400 }, { "epoch": 0.02, "learning_rate": 4.911943643932117e-05, "loss": 2.1678, "step": 28500 }, { "epoch": 0.02, "learning_rate": 4.911623439000961e-05, "loss": 2.1837, "step": 28600 }, { "epoch": 0.02, "learning_rate": 4.9113032340698054e-05, "loss": 2.1761, "step": 28700 }, { "epoch": 0.02, "learning_rate": 4.910983029138649e-05, "loss": 2.1773, "step": 28800 }, { "epoch": 0.02, "learning_rate": 4.9106628242074933e-05, "loss": 2.1596, "step": 28900 }, { "epoch": 0.02, "learning_rate": 4.9103426192763366e-05, "loss": 2.1617, "step": 29000 }, { "epoch": 0.02, "eval_loss": 1.89713716506958, "eval_runtime": 97.5802, "eval_samples_per_second": 102.48, "eval_steps_per_second": 6.405, "step": 29000 }, { "epoch": 0.02, "learning_rate": 4.910022414345181e-05, "loss": 2.1528, "step": 29100 }, { "epoch": 0.02, "learning_rate": 4.909702209414025e-05, "loss": 2.1772, "step": 29200 }, { "epoch": 0.02, "learning_rate": 4.909382004482869e-05, "loss": 2.174, "step": 29300 }, { "epoch": 0.02, "learning_rate": 4.909061799551713e-05, "loss": 2.1512, "step": 29400 }, { "epoch": 0.02, "learning_rate": 4.908741594620557e-05, "loss": 2.138, "step": 29500 }, { "epoch": 0.02, "learning_rate": 4.908421389689402e-05, "loss": 2.1521, "step": 29600 }, { "epoch": 0.02, "learning_rate": 4.908101184758245e-05, "loss": 2.1588, "step": 29700 }, { "epoch": 0.02, "learning_rate": 4.90778097982709e-05, "loss": 2.145, "step": 29800 }, { "epoch": 0.02, "learning_rate": 4.907460774895933e-05, "loss": 2.1606, "step": 29900 }, { "epoch": 0.02, "learning_rate": 4.907140569964778e-05, "loss": 2.1518, "step": 30000 }, { "epoch": 0.02, "eval_loss": 1.898345947265625, "eval_runtime": 96.4291, "eval_samples_per_second": 103.703, "eval_steps_per_second": 6.481, "step": 30000 }, { "epoch": 0.02, "learning_rate": 4.906820365033622e-05, "loss": 2.1758, "step": 30100 }, { "epoch": 0.02, "learning_rate": 4.906500160102466e-05, "loss": 2.1573, "step": 30200 }, { "epoch": 0.02, "learning_rate": 4.90617995517131e-05, "loss": 2.1624, "step": 30300 }, { "epoch": 0.02, "learning_rate": 4.9058597502401536e-05, "loss": 2.1212, "step": 30400 }, { "epoch": 0.02, "learning_rate": 4.905539545308998e-05, "loss": 2.1691, "step": 30500 }, { "epoch": 0.02, "learning_rate": 4.9052193403778416e-05, "loss": 2.1344, "step": 30600 }, { "epoch": 0.02, "learning_rate": 4.904899135446686e-05, "loss": 2.1544, "step": 30700 }, { "epoch": 0.02, "learning_rate": 4.90457893051553e-05, "loss": 2.1504, "step": 30800 }, { "epoch": 0.02, "learning_rate": 4.904258725584374e-05, "loss": 2.1416, "step": 30900 }, { "epoch": 0.02, "learning_rate": 4.903938520653218e-05, "loss": 2.1381, "step": 31000 }, { "epoch": 0.02, "eval_loss": 1.898423671722412, "eval_runtime": 95.9426, "eval_samples_per_second": 104.229, "eval_steps_per_second": 6.514, "step": 31000 }, { "epoch": 0.02, "learning_rate": 4.903618315722062e-05, "loss": 2.1716, "step": 31100 }, { "epoch": 0.02, "learning_rate": 4.903298110790907e-05, "loss": 2.1424, "step": 31200 }, { "epoch": 0.02, "learning_rate": 4.90297790585975e-05, "loss": 2.1346, "step": 31300 }, { "epoch": 0.02, "learning_rate": 4.902657700928595e-05, "loss": 2.127, "step": 31400 }, { "epoch": 0.02, "learning_rate": 4.902337495997439e-05, "loss": 2.1533, "step": 31500 }, { "epoch": 0.02, "learning_rate": 4.902017291066283e-05, "loss": 2.1399, "step": 31600 }, { "epoch": 0.02, "learning_rate": 4.9016970861351266e-05, "loss": 2.1501, "step": 31700 }, { "epoch": 0.02, "learning_rate": 4.9013768812039706e-05, "loss": 2.1315, "step": 31800 }, { "epoch": 0.02, "learning_rate": 4.901056676272815e-05, "loss": 2.1378, "step": 31900 }, { "epoch": 0.02, "learning_rate": 4.9007364713416586e-05, "loss": 2.1637, "step": 32000 }, { "epoch": 0.02, "eval_loss": 1.8942517042160034, "eval_runtime": 92.4539, "eval_samples_per_second": 108.162, "eval_steps_per_second": 6.76, "step": 32000 }, { "epoch": 0.02, "learning_rate": 4.900416266410503e-05, "loss": 2.1593, "step": 32100 }, { "epoch": 0.02, "learning_rate": 4.9000960614793465e-05, "loss": 2.141, "step": 32200 }, { "epoch": 0.02, "learning_rate": 4.899775856548191e-05, "loss": 2.1259, "step": 32300 }, { "epoch": 0.02, "learning_rate": 4.899455651617035e-05, "loss": 2.1524, "step": 32400 }, { "epoch": 0.02, "learning_rate": 4.899135446685879e-05, "loss": 2.1481, "step": 32500 }, { "epoch": 0.02, "learning_rate": 4.898815241754723e-05, "loss": 2.1464, "step": 32600 }, { "epoch": 0.02, "learning_rate": 4.898495036823567e-05, "loss": 2.1601, "step": 32700 }, { "epoch": 0.02, "learning_rate": 4.898174831892412e-05, "loss": 2.1479, "step": 32800 }, { "epoch": 0.02, "learning_rate": 4.897854626961255e-05, "loss": 2.1542, "step": 32900 }, { "epoch": 0.02, "learning_rate": 4.8975344220300997e-05, "loss": 2.1509, "step": 33000 }, { "epoch": 0.02, "eval_loss": 1.8882228136062622, "eval_runtime": 93.5127, "eval_samples_per_second": 106.937, "eval_steps_per_second": 6.684, "step": 33000 }, { "epoch": 0.02, "learning_rate": 4.8972142170989436e-05, "loss": 2.1337, "step": 33100 }, { "epoch": 0.02, "learning_rate": 4.8968940121677876e-05, "loss": 2.113, "step": 33200 }, { "epoch": 0.02, "learning_rate": 4.8965738072366316e-05, "loss": 2.1294, "step": 33300 }, { "epoch": 0.02, "learning_rate": 4.8962536023054756e-05, "loss": 2.1529, "step": 33400 }, { "epoch": 0.02, "learning_rate": 4.89593339737432e-05, "loss": 2.161, "step": 33500 }, { "epoch": 0.02, "learning_rate": 4.8956131924431635e-05, "loss": 2.1361, "step": 33600 }, { "epoch": 0.02, "learning_rate": 4.895292987512008e-05, "loss": 2.1316, "step": 33700 }, { "epoch": 0.02, "learning_rate": 4.894972782580852e-05, "loss": 2.1321, "step": 33800 }, { "epoch": 0.02, "learning_rate": 4.894652577649696e-05, "loss": 2.1375, "step": 33900 }, { "epoch": 0.02, "learning_rate": 4.89433237271854e-05, "loss": 2.1269, "step": 34000 }, { "epoch": 0.02, "eval_loss": 1.8904426097869873, "eval_runtime": 95.593, "eval_samples_per_second": 104.61, "eval_steps_per_second": 6.538, "step": 34000 }, { "epoch": 0.02, "learning_rate": 4.894012167787384e-05, "loss": 2.1338, "step": 34100 }, { "epoch": 0.02, "learning_rate": 4.893691962856228e-05, "loss": 2.1157, "step": 34200 }, { "epoch": 0.02, "learning_rate": 4.893371757925072e-05, "loss": 2.1409, "step": 34300 }, { "epoch": 0.02, "learning_rate": 4.8930515529939166e-05, "loss": 2.1458, "step": 34400 }, { "epoch": 0.02, "learning_rate": 4.8927313480627606e-05, "loss": 2.1271, "step": 34500 }, { "epoch": 0.02, "learning_rate": 4.8924111431316046e-05, "loss": 2.134, "step": 34600 }, { "epoch": 0.02, "learning_rate": 4.8920909382004486e-05, "loss": 2.1148, "step": 34700 }, { "epoch": 0.02, "learning_rate": 4.8917707332692925e-05, "loss": 2.1313, "step": 34800 }, { "epoch": 0.02, "learning_rate": 4.8914505283381365e-05, "loss": 2.1209, "step": 34900 }, { "epoch": 0.02, "learning_rate": 4.8911303234069805e-05, "loss": 2.1226, "step": 35000 }, { "epoch": 0.02, "eval_loss": 1.8888025283813477, "eval_runtime": 96.2639, "eval_samples_per_second": 103.881, "eval_steps_per_second": 6.493, "step": 35000 }, { "epoch": 0.02, "learning_rate": 4.890810118475825e-05, "loss": 2.1234, "step": 35100 }, { "epoch": 0.02, "learning_rate": 4.8904899135446684e-05, "loss": 2.1414, "step": 35200 }, { "epoch": 0.02, "learning_rate": 4.890169708613513e-05, "loss": 2.1286, "step": 35300 }, { "epoch": 0.02, "learning_rate": 4.889849503682357e-05, "loss": 2.1233, "step": 35400 }, { "epoch": 0.02, "learning_rate": 4.889529298751201e-05, "loss": 2.1149, "step": 35500 }, { "epoch": 0.02, "learning_rate": 4.889209093820045e-05, "loss": 2.1344, "step": 35600 }, { "epoch": 0.02, "learning_rate": 4.888888888888889e-05, "loss": 2.1063, "step": 35700 }, { "epoch": 0.02, "learning_rate": 4.888568683957733e-05, "loss": 2.1396, "step": 35800 }, { "epoch": 0.02, "learning_rate": 4.888248479026577e-05, "loss": 2.1342, "step": 35900 }, { "epoch": 0.02, "learning_rate": 4.8879282740954216e-05, "loss": 2.1482, "step": 36000 }, { "epoch": 0.02, "eval_loss": 1.886938452720642, "eval_runtime": 95.788, "eval_samples_per_second": 104.397, "eval_steps_per_second": 6.525, "step": 36000 }, { "epoch": 0.02, "learning_rate": 4.8876080691642656e-05, "loss": 2.1252, "step": 36100 }, { "epoch": 0.02, "learning_rate": 4.8872878642331095e-05, "loss": 2.1118, "step": 36200 }, { "epoch": 0.02, "learning_rate": 4.8869676593019535e-05, "loss": 2.1056, "step": 36300 }, { "epoch": 0.02, "learning_rate": 4.8866474543707975e-05, "loss": 2.1367, "step": 36400 }, { "epoch": 0.02, "learning_rate": 4.8863272494396415e-05, "loss": 2.1362, "step": 36500 }, { "epoch": 0.02, "learning_rate": 4.8860070445084854e-05, "loss": 2.1112, "step": 36600 }, { "epoch": 0.02, "learning_rate": 4.88568683957733e-05, "loss": 2.1358, "step": 36700 }, { "epoch": 0.02, "learning_rate": 4.885366634646174e-05, "loss": 2.1186, "step": 36800 }, { "epoch": 0.02, "learning_rate": 4.885046429715018e-05, "loss": 2.1239, "step": 36900 }, { "epoch": 0.02, "learning_rate": 4.884726224783862e-05, "loss": 2.1119, "step": 37000 }, { "epoch": 0.02, "eval_loss": 1.8857529163360596, "eval_runtime": 94.8425, "eval_samples_per_second": 105.438, "eval_steps_per_second": 6.59, "step": 37000 }, { "epoch": 0.02, "learning_rate": 4.884406019852706e-05, "loss": 2.1067, "step": 37100 }, { "epoch": 0.02, "learning_rate": 4.88408581492155e-05, "loss": 2.1441, "step": 37200 }, { "epoch": 0.02, "learning_rate": 4.883765609990394e-05, "loss": 2.1262, "step": 37300 }, { "epoch": 0.02, "learning_rate": 4.883445405059238e-05, "loss": 2.1161, "step": 37400 }, { "epoch": 0.02, "learning_rate": 4.883125200128082e-05, "loss": 2.1083, "step": 37500 }, { "epoch": 0.02, "learning_rate": 4.8828049951969265e-05, "loss": 2.0965, "step": 37600 }, { "epoch": 0.02, "learning_rate": 4.8824847902657705e-05, "loss": 2.1177, "step": 37700 }, { "epoch": 0.02, "learning_rate": 4.8821645853346145e-05, "loss": 2.121, "step": 37800 }, { "epoch": 0.02, "learning_rate": 4.8818443804034584e-05, "loss": 2.1213, "step": 37900 }, { "epoch": 0.02, "learning_rate": 4.8815241754723024e-05, "loss": 2.1069, "step": 38000 }, { "epoch": 0.02, "eval_loss": 1.883195161819458, "eval_runtime": 97.9715, "eval_samples_per_second": 102.071, "eval_steps_per_second": 6.379, "step": 38000 }, { "epoch": 0.02, "learning_rate": 4.8812039705411464e-05, "loss": 2.1239, "step": 38100 }, { "epoch": 0.02, "learning_rate": 4.8808837656099904e-05, "loss": 2.109, "step": 38200 }, { "epoch": 0.02, "learning_rate": 4.880563560678835e-05, "loss": 2.1289, "step": 38300 }, { "epoch": 0.02, "learning_rate": 4.880243355747679e-05, "loss": 2.095, "step": 38400 }, { "epoch": 0.02, "learning_rate": 4.879923150816523e-05, "loss": 2.112, "step": 38500 }, { "epoch": 0.02, "learning_rate": 4.879602945885367e-05, "loss": 2.1238, "step": 38600 }, { "epoch": 0.02, "learning_rate": 4.879282740954211e-05, "loss": 2.1124, "step": 38700 }, { "epoch": 0.02, "learning_rate": 4.878962536023055e-05, "loss": 2.1065, "step": 38800 }, { "epoch": 0.02, "learning_rate": 4.878642331091899e-05, "loss": 2.1195, "step": 38900 }, { "epoch": 0.02, "learning_rate": 4.878322126160743e-05, "loss": 2.1221, "step": 39000 }, { "epoch": 0.02, "eval_loss": 1.8769609928131104, "eval_runtime": 98.5309, "eval_samples_per_second": 101.491, "eval_steps_per_second": 6.343, "step": 39000 }, { "epoch": 0.03, "learning_rate": 4.8780019212295875e-05, "loss": 2.1016, "step": 39100 }, { "epoch": 0.03, "learning_rate": 4.8776817162984315e-05, "loss": 2.1234, "step": 39200 }, { "epoch": 0.03, "learning_rate": 4.8773615113672754e-05, "loss": 2.1148, "step": 39300 }, { "epoch": 0.03, "learning_rate": 4.8770413064361194e-05, "loss": 2.1141, "step": 39400 }, { "epoch": 0.03, "learning_rate": 4.8767211015049634e-05, "loss": 2.1134, "step": 39500 }, { "epoch": 0.03, "learning_rate": 4.8764008965738074e-05, "loss": 2.0978, "step": 39600 }, { "epoch": 0.03, "learning_rate": 4.876080691642651e-05, "loss": 2.106, "step": 39700 }, { "epoch": 0.03, "learning_rate": 4.875760486711495e-05, "loss": 2.0841, "step": 39800 }, { "epoch": 0.03, "learning_rate": 4.87544028178034e-05, "loss": 2.1078, "step": 39900 }, { "epoch": 0.03, "learning_rate": 4.875120076849184e-05, "loss": 2.1182, "step": 40000 }, { "epoch": 0.03, "eval_loss": 1.8739625215530396, "eval_runtime": 94.3874, "eval_samples_per_second": 105.946, "eval_steps_per_second": 6.622, "step": 40000 }, { "epoch": 0.03, "learning_rate": 4.874799871918028e-05, "loss": 2.0974, "step": 40100 }, { "epoch": 0.03, "learning_rate": 4.874479666986872e-05, "loss": 2.1048, "step": 40200 }, { "epoch": 0.03, "learning_rate": 4.874159462055716e-05, "loss": 2.1108, "step": 40300 }, { "epoch": 0.03, "learning_rate": 4.87383925712456e-05, "loss": 2.1179, "step": 40400 }, { "epoch": 0.03, "learning_rate": 4.873519052193404e-05, "loss": 2.1145, "step": 40500 }, { "epoch": 0.03, "learning_rate": 4.873198847262248e-05, "loss": 2.1091, "step": 40600 }, { "epoch": 0.03, "learning_rate": 4.8728786423310924e-05, "loss": 2.0934, "step": 40700 }, { "epoch": 0.03, "learning_rate": 4.8725584373999364e-05, "loss": 2.1018, "step": 40800 }, { "epoch": 0.03, "learning_rate": 4.8722382324687804e-05, "loss": 2.1407, "step": 40900 }, { "epoch": 0.03, "learning_rate": 4.8719180275376243e-05, "loss": 2.1167, "step": 41000 }, { "epoch": 0.03, "eval_loss": 1.8744018077850342, "eval_runtime": 95.1869, "eval_samples_per_second": 105.056, "eval_steps_per_second": 6.566, "step": 41000 }, { "epoch": 0.03, "learning_rate": 4.871597822606468e-05, "loss": 2.1163, "step": 41100 }, { "epoch": 0.03, "learning_rate": 4.871277617675312e-05, "loss": 2.1124, "step": 41200 }, { "epoch": 0.03, "learning_rate": 4.870957412744156e-05, "loss": 2.0885, "step": 41300 }, { "epoch": 0.03, "learning_rate": 4.870637207813001e-05, "loss": 2.1009, "step": 41400 }, { "epoch": 0.03, "learning_rate": 4.870317002881845e-05, "loss": 2.1074, "step": 41500 }, { "epoch": 0.03, "learning_rate": 4.869996797950689e-05, "loss": 2.1131, "step": 41600 }, { "epoch": 0.03, "learning_rate": 4.869676593019533e-05, "loss": 2.1105, "step": 41700 }, { "epoch": 0.03, "learning_rate": 4.869356388088377e-05, "loss": 2.0962, "step": 41800 }, { "epoch": 0.03, "learning_rate": 4.869036183157221e-05, "loss": 2.0735, "step": 41900 }, { "epoch": 0.03, "learning_rate": 4.868715978226065e-05, "loss": 2.0915, "step": 42000 }, { "epoch": 0.03, "eval_loss": 1.8723208904266357, "eval_runtime": 93.3988, "eval_samples_per_second": 107.068, "eval_steps_per_second": 6.692, "step": 42000 }, { "epoch": 0.03, "learning_rate": 4.868395773294909e-05, "loss": 2.1231, "step": 42100 }, { "epoch": 0.03, "learning_rate": 4.868075568363753e-05, "loss": 2.1127, "step": 42200 }, { "epoch": 0.03, "learning_rate": 4.8677553634325974e-05, "loss": 2.1248, "step": 42300 }, { "epoch": 0.03, "learning_rate": 4.867435158501441e-05, "loss": 2.0897, "step": 42400 }, { "epoch": 0.03, "learning_rate": 4.867114953570285e-05, "loss": 2.0895, "step": 42500 }, { "epoch": 0.03, "learning_rate": 4.866794748639129e-05, "loss": 2.1022, "step": 42600 }, { "epoch": 0.03, "learning_rate": 4.866474543707973e-05, "loss": 2.1011, "step": 42700 }, { "epoch": 0.03, "learning_rate": 4.866154338776817e-05, "loss": 2.111, "step": 42800 }, { "epoch": 0.03, "learning_rate": 4.865834133845661e-05, "loss": 2.0994, "step": 42900 }, { "epoch": 0.03, "learning_rate": 4.865513928914506e-05, "loss": 2.0783, "step": 43000 }, { "epoch": 0.03, "eval_loss": 1.8745108842849731, "eval_runtime": 94.4655, "eval_samples_per_second": 105.859, "eval_steps_per_second": 6.616, "step": 43000 }, { "epoch": 0.03, "learning_rate": 4.86519372398335e-05, "loss": 2.1138, "step": 43100 }, { "epoch": 0.03, "learning_rate": 4.864873519052194e-05, "loss": 2.0864, "step": 43200 }, { "epoch": 0.03, "learning_rate": 4.864553314121038e-05, "loss": 2.1164, "step": 43300 }, { "epoch": 0.03, "learning_rate": 4.864233109189882e-05, "loss": 2.0773, "step": 43400 }, { "epoch": 0.03, "learning_rate": 4.863912904258726e-05, "loss": 2.0942, "step": 43500 }, { "epoch": 0.03, "learning_rate": 4.86359269932757e-05, "loss": 2.0771, "step": 43600 }, { "epoch": 0.03, "learning_rate": 4.8632724943964144e-05, "loss": 2.102, "step": 43700 }, { "epoch": 0.03, "learning_rate": 4.8629522894652576e-05, "loss": 2.0898, "step": 43800 }, { "epoch": 0.03, "learning_rate": 4.862632084534102e-05, "loss": 2.0868, "step": 43900 }, { "epoch": 0.03, "learning_rate": 4.862311879602946e-05, "loss": 2.1116, "step": 44000 }, { "epoch": 0.03, "eval_loss": 1.8690712451934814, "eval_runtime": 97.0191, "eval_samples_per_second": 103.073, "eval_steps_per_second": 6.442, "step": 44000 }, { "epoch": 0.03, "learning_rate": 4.86199167467179e-05, "loss": 2.0911, "step": 44100 }, { "epoch": 0.03, "learning_rate": 4.861671469740634e-05, "loss": 2.0988, "step": 44200 }, { "epoch": 0.03, "learning_rate": 4.861351264809478e-05, "loss": 2.1041, "step": 44300 }, { "epoch": 0.03, "learning_rate": 4.861031059878323e-05, "loss": 2.1103, "step": 44400 }, { "epoch": 0.03, "learning_rate": 4.860710854947166e-05, "loss": 2.0734, "step": 44500 }, { "epoch": 0.03, "learning_rate": 4.860390650016011e-05, "loss": 2.0927, "step": 44600 }, { "epoch": 0.03, "learning_rate": 4.860070445084855e-05, "loss": 2.0838, "step": 44700 }, { "epoch": 0.03, "learning_rate": 4.859750240153699e-05, "loss": 2.0923, "step": 44800 }, { "epoch": 0.03, "learning_rate": 4.859430035222543e-05, "loss": 2.0989, "step": 44900 }, { "epoch": 0.03, "learning_rate": 4.859109830291387e-05, "loss": 2.0941, "step": 45000 }, { "epoch": 0.03, "eval_loss": 1.8631078004837036, "eval_runtime": 96.0666, "eval_samples_per_second": 104.094, "eval_steps_per_second": 6.506, "step": 45000 }, { "epoch": 0.03, "learning_rate": 4.858789625360231e-05, "loss": 2.0932, "step": 45100 }, { "epoch": 0.03, "learning_rate": 4.8584694204290746e-05, "loss": 2.0825, "step": 45200 }, { "epoch": 0.03, "learning_rate": 4.858149215497919e-05, "loss": 2.08, "step": 45300 }, { "epoch": 0.03, "learning_rate": 4.8578290105667626e-05, "loss": 2.0803, "step": 45400 }, { "epoch": 0.03, "learning_rate": 4.857508805635607e-05, "loss": 2.096, "step": 45500 }, { "epoch": 0.03, "learning_rate": 4.857188600704451e-05, "loss": 2.0821, "step": 45600 }, { "epoch": 0.03, "learning_rate": 4.856868395773295e-05, "loss": 2.0856, "step": 45700 }, { "epoch": 0.03, "learning_rate": 4.856548190842139e-05, "loss": 2.1044, "step": 45800 }, { "epoch": 0.03, "learning_rate": 4.856227985910983e-05, "loss": 2.0483, "step": 45900 }, { "epoch": 0.03, "learning_rate": 4.855907780979828e-05, "loss": 2.0644, "step": 46000 }, { "epoch": 0.03, "eval_loss": 1.8629900217056274, "eval_runtime": 99.8095, "eval_samples_per_second": 100.191, "eval_steps_per_second": 6.262, "step": 46000 }, { "epoch": 0.03, "learning_rate": 4.855587576048671e-05, "loss": 2.0711, "step": 46100 }, { "epoch": 0.03, "learning_rate": 4.855267371117516e-05, "loss": 2.1029, "step": 46200 }, { "epoch": 0.03, "learning_rate": 4.854947166186359e-05, "loss": 2.1027, "step": 46300 }, { "epoch": 0.03, "learning_rate": 4.854626961255204e-05, "loss": 2.0988, "step": 46400 }, { "epoch": 0.03, "learning_rate": 4.8543067563240477e-05, "loss": 2.0739, "step": 46500 }, { "epoch": 0.03, "learning_rate": 4.8539865513928916e-05, "loss": 2.0918, "step": 46600 }, { "epoch": 0.03, "learning_rate": 4.853666346461736e-05, "loss": 2.0839, "step": 46700 }, { "epoch": 0.03, "learning_rate": 4.8533461415305796e-05, "loss": 2.0649, "step": 46800 }, { "epoch": 0.03, "learning_rate": 4.853025936599424e-05, "loss": 2.0927, "step": 46900 }, { "epoch": 0.03, "learning_rate": 4.8527057316682675e-05, "loss": 2.0869, "step": 47000 }, { "epoch": 0.03, "eval_loss": 1.8689996004104614, "eval_runtime": 96.7764, "eval_samples_per_second": 103.331, "eval_steps_per_second": 6.458, "step": 47000 }, { "epoch": 0.03, "learning_rate": 4.852385526737112e-05, "loss": 2.0824, "step": 47100 }, { "epoch": 0.03, "learning_rate": 4.852065321805956e-05, "loss": 2.0822, "step": 47200 }, { "epoch": 0.03, "learning_rate": 4.8517451168748e-05, "loss": 2.0935, "step": 47300 }, { "epoch": 0.03, "learning_rate": 4.851424911943644e-05, "loss": 2.0828, "step": 47400 }, { "epoch": 0.03, "learning_rate": 4.851104707012488e-05, "loss": 2.0705, "step": 47500 }, { "epoch": 0.03, "learning_rate": 4.850784502081333e-05, "loss": 2.078, "step": 47600 }, { "epoch": 0.03, "learning_rate": 4.850464297150176e-05, "loss": 2.071, "step": 47700 }, { "epoch": 0.03, "learning_rate": 4.850144092219021e-05, "loss": 2.079, "step": 47800 }, { "epoch": 0.03, "learning_rate": 4.849823887287864e-05, "loss": 2.0876, "step": 47900 }, { "epoch": 0.03, "learning_rate": 4.8495036823567086e-05, "loss": 2.0755, "step": 48000 }, { "epoch": 0.03, "eval_loss": 1.864153504371643, "eval_runtime": 96.5465, "eval_samples_per_second": 103.577, "eval_steps_per_second": 6.474, "step": 48000 }, { "epoch": 0.03, "learning_rate": 4.8491834774255526e-05, "loss": 2.0876, "step": 48100 }, { "epoch": 0.03, "learning_rate": 4.8488632724943966e-05, "loss": 2.0792, "step": 48200 }, { "epoch": 0.03, "learning_rate": 4.848543067563241e-05, "loss": 2.0835, "step": 48300 }, { "epoch": 0.03, "learning_rate": 4.8482228626320845e-05, "loss": 2.0625, "step": 48400 }, { "epoch": 0.03, "learning_rate": 4.847902657700929e-05, "loss": 2.0956, "step": 48500 }, { "epoch": 0.03, "learning_rate": 4.8475824527697725e-05, "loss": 2.0735, "step": 48600 }, { "epoch": 0.03, "learning_rate": 4.847262247838617e-05, "loss": 2.0663, "step": 48700 }, { "epoch": 0.03, "learning_rate": 4.846942042907461e-05, "loss": 2.0996, "step": 48800 }, { "epoch": 0.03, "learning_rate": 4.846621837976305e-05, "loss": 2.0943, "step": 48900 }, { "epoch": 0.03, "learning_rate": 4.84630163304515e-05, "loss": 2.0697, "step": 49000 }, { "epoch": 0.03, "eval_loss": 1.8585724830627441, "eval_runtime": 97.2507, "eval_samples_per_second": 102.827, "eval_steps_per_second": 6.427, "step": 49000 }, { "epoch": 0.03, "learning_rate": 4.845981428113993e-05, "loss": 2.0932, "step": 49100 }, { "epoch": 0.03, "learning_rate": 4.8456612231828377e-05, "loss": 2.0735, "step": 49200 }, { "epoch": 0.03, "learning_rate": 4.845341018251681e-05, "loss": 2.0938, "step": 49300 }, { "epoch": 0.03, "learning_rate": 4.8450208133205256e-05, "loss": 2.053, "step": 49400 }, { "epoch": 0.03, "learning_rate": 4.844700608389369e-05, "loss": 2.0453, "step": 49500 }, { "epoch": 0.03, "learning_rate": 4.8443804034582136e-05, "loss": 2.072, "step": 49600 }, { "epoch": 0.03, "learning_rate": 4.8440601985270575e-05, "loss": 2.0817, "step": 49700 }, { "epoch": 0.03, "learning_rate": 4.8437399935959015e-05, "loss": 2.0836, "step": 49800 }, { "epoch": 0.03, "learning_rate": 4.843419788664746e-05, "loss": 2.0643, "step": 49900 }, { "epoch": 0.03, "learning_rate": 4.8430995837335894e-05, "loss": 2.0567, "step": 50000 }, { "epoch": 0.03, "eval_loss": 1.8589906692504883, "eval_runtime": 96.3346, "eval_samples_per_second": 103.805, "eval_steps_per_second": 6.488, "step": 50000 }, { "epoch": 0.03, "learning_rate": 4.842779378802434e-05, "loss": 2.0647, "step": 50100 }, { "epoch": 0.03, "learning_rate": 4.8424591738712774e-05, "loss": 2.0645, "step": 50200 }, { "epoch": 0.03, "learning_rate": 4.842138968940122e-05, "loss": 2.0981, "step": 50300 }, { "epoch": 0.03, "learning_rate": 4.841818764008966e-05, "loss": 2.088, "step": 50400 }, { "epoch": 0.03, "learning_rate": 4.84149855907781e-05, "loss": 2.0693, "step": 50500 }, { "epoch": 0.03, "learning_rate": 4.8411783541466546e-05, "loss": 2.0972, "step": 50600 }, { "epoch": 0.03, "learning_rate": 4.840858149215498e-05, "loss": 2.0773, "step": 50700 }, { "epoch": 0.03, "learning_rate": 4.8405379442843426e-05, "loss": 2.0591, "step": 50800 }, { "epoch": 0.03, "learning_rate": 4.840217739353186e-05, "loss": 2.0494, "step": 50900 }, { "epoch": 0.03, "learning_rate": 4.8398975344220305e-05, "loss": 2.0568, "step": 51000 }, { "epoch": 0.03, "eval_loss": 1.8614695072174072, "eval_runtime": 96.3248, "eval_samples_per_second": 103.815, "eval_steps_per_second": 6.488, "step": 51000 }, { "epoch": 0.03, "learning_rate": 4.839577329490874e-05, "loss": 2.0701, "step": 51100 }, { "epoch": 0.03, "learning_rate": 4.8392571245597185e-05, "loss": 2.0799, "step": 51200 }, { "epoch": 0.03, "learning_rate": 4.8389369196285625e-05, "loss": 2.0303, "step": 51300 }, { "epoch": 0.03, "learning_rate": 4.8386167146974064e-05, "loss": 2.0617, "step": 51400 }, { "epoch": 0.03, "learning_rate": 4.838296509766251e-05, "loss": 2.0453, "step": 51500 }, { "epoch": 0.03, "learning_rate": 4.8379763048350944e-05, "loss": 2.077, "step": 51600 }, { "epoch": 0.03, "learning_rate": 4.837656099903939e-05, "loss": 2.0575, "step": 51700 }, { "epoch": 0.03, "learning_rate": 4.837335894972782e-05, "loss": 2.0759, "step": 51800 }, { "epoch": 0.03, "learning_rate": 4.837015690041627e-05, "loss": 2.0463, "step": 51900 }, { "epoch": 0.03, "learning_rate": 4.836695485110471e-05, "loss": 2.0498, "step": 52000 }, { "epoch": 0.03, "eval_loss": 1.857500433921814, "eval_runtime": 106.0923, "eval_samples_per_second": 94.258, "eval_steps_per_second": 5.891, "step": 52000 }, { "epoch": 0.03, "learning_rate": 4.836375280179315e-05, "loss": 2.07, "step": 52100 }, { "epoch": 0.03, "learning_rate": 4.8360550752481596e-05, "loss": 2.0438, "step": 52200 }, { "epoch": 0.03, "learning_rate": 4.835734870317003e-05, "loss": 2.0776, "step": 52300 }, { "epoch": 0.03, "learning_rate": 4.8354146653858475e-05, "loss": 2.0769, "step": 52400 }, { "epoch": 0.03, "learning_rate": 4.835094460454691e-05, "loss": 2.0651, "step": 52500 }, { "epoch": 0.03, "learning_rate": 4.8347742555235355e-05, "loss": 2.0667, "step": 52600 }, { "epoch": 0.03, "learning_rate": 4.834454050592379e-05, "loss": 2.0827, "step": 52700 }, { "epoch": 0.03, "learning_rate": 4.8341338456612234e-05, "loss": 2.0581, "step": 52800 }, { "epoch": 0.03, "learning_rate": 4.8338136407300674e-05, "loss": 2.0494, "step": 52900 }, { "epoch": 0.03, "learning_rate": 4.8334934357989114e-05, "loss": 2.0708, "step": 53000 }, { "epoch": 0.03, "eval_loss": 1.853657841682434, "eval_runtime": 108.5557, "eval_samples_per_second": 92.119, "eval_steps_per_second": 5.757, "step": 53000 }, { "epoch": 0.03, "learning_rate": 4.833173230867756e-05, "loss": 2.0634, "step": 53100 }, { "epoch": 0.03, "learning_rate": 4.832853025936599e-05, "loss": 2.0644, "step": 53200 }, { "epoch": 0.03, "learning_rate": 4.832532821005444e-05, "loss": 2.0623, "step": 53300 }, { "epoch": 0.03, "learning_rate": 4.832212616074287e-05, "loss": 2.052, "step": 53400 }, { "epoch": 0.03, "learning_rate": 4.831892411143132e-05, "loss": 2.0449, "step": 53500 }, { "epoch": 0.03, "learning_rate": 4.831572206211976e-05, "loss": 2.0606, "step": 53600 }, { "epoch": 0.03, "learning_rate": 4.83125200128082e-05, "loss": 2.0631, "step": 53700 }, { "epoch": 0.03, "learning_rate": 4.8309317963496645e-05, "loss": 2.0554, "step": 53800 }, { "epoch": 0.03, "learning_rate": 4.830611591418508e-05, "loss": 2.0437, "step": 53900 }, { "epoch": 0.03, "learning_rate": 4.8302913864873525e-05, "loss": 2.0787, "step": 54000 }, { "epoch": 0.03, "eval_loss": 1.8551527261734009, "eval_runtime": 107.6443, "eval_samples_per_second": 92.899, "eval_steps_per_second": 5.806, "step": 54000 }, { "epoch": 0.03, "learning_rate": 4.829971181556196e-05, "loss": 2.0626, "step": 54100 }, { "epoch": 0.03, "learning_rate": 4.8296509766250404e-05, "loss": 2.0605, "step": 54200 }, { "epoch": 0.03, "learning_rate": 4.8293307716938844e-05, "loss": 2.04, "step": 54300 }, { "epoch": 0.03, "learning_rate": 4.8290105667627284e-05, "loss": 2.0704, "step": 54400 }, { "epoch": 0.03, "learning_rate": 4.828690361831572e-05, "loss": 2.0665, "step": 54500 }, { "epoch": 0.03, "learning_rate": 4.828370156900416e-05, "loss": 2.0699, "step": 54600 }, { "epoch": 0.04, "learning_rate": 4.828049951969261e-05, "loss": 2.054, "step": 54700 }, { "epoch": 0.04, "learning_rate": 4.827729747038104e-05, "loss": 2.0437, "step": 54800 }, { "epoch": 0.04, "learning_rate": 4.827409542106949e-05, "loss": 2.0456, "step": 54900 }, { "epoch": 0.04, "learning_rate": 4.827089337175792e-05, "loss": 2.0584, "step": 55000 }, { "epoch": 0.04, "eval_loss": 1.8516242504119873, "eval_runtime": 102.7521, "eval_samples_per_second": 97.322, "eval_steps_per_second": 6.083, "step": 55000 }, { "epoch": 0.04, "learning_rate": 4.826769132244637e-05, "loss": 2.0605, "step": 55100 }, { "epoch": 0.04, "learning_rate": 4.826448927313481e-05, "loss": 2.0518, "step": 55200 }, { "epoch": 0.04, "learning_rate": 4.826128722382325e-05, "loss": 2.0645, "step": 55300 }, { "epoch": 0.04, "learning_rate": 4.8258085174511695e-05, "loss": 2.0689, "step": 55400 }, { "epoch": 0.04, "learning_rate": 4.825488312520013e-05, "loss": 2.0556, "step": 55500 }, { "epoch": 0.04, "learning_rate": 4.8251681075888574e-05, "loss": 2.0657, "step": 55600 }, { "epoch": 0.04, "learning_rate": 4.824847902657701e-05, "loss": 2.0646, "step": 55700 }, { "epoch": 0.04, "learning_rate": 4.8245276977265454e-05, "loss": 2.0718, "step": 55800 }, { "epoch": 0.04, "learning_rate": 4.824207492795389e-05, "loss": 2.0601, "step": 55900 }, { "epoch": 0.04, "learning_rate": 4.823887287864233e-05, "loss": 2.0719, "step": 56000 }, { "epoch": 0.04, "eval_loss": 1.8505208492279053, "eval_runtime": 99.2869, "eval_samples_per_second": 100.718, "eval_steps_per_second": 6.295, "step": 56000 }, { "epoch": 0.04, "learning_rate": 4.823567082933077e-05, "loss": 2.0463, "step": 56100 }, { "epoch": 0.04, "learning_rate": 4.823246878001921e-05, "loss": 2.0706, "step": 56200 }, { "epoch": 0.04, "learning_rate": 4.822926673070766e-05, "loss": 2.0587, "step": 56300 }, { "epoch": 0.04, "learning_rate": 4.822606468139609e-05, "loss": 2.0524, "step": 56400 }, { "epoch": 0.04, "learning_rate": 4.822286263208454e-05, "loss": 2.0348, "step": 56500 }, { "epoch": 0.04, "learning_rate": 4.821966058277298e-05, "loss": 2.0472, "step": 56600 }, { "epoch": 0.04, "learning_rate": 4.821645853346142e-05, "loss": 2.0575, "step": 56700 }, { "epoch": 0.04, "learning_rate": 4.821325648414986e-05, "loss": 2.0523, "step": 56800 }, { "epoch": 0.04, "learning_rate": 4.82100544348383e-05, "loss": 2.0697, "step": 56900 }, { "epoch": 0.04, "learning_rate": 4.8206852385526744e-05, "loss": 2.06, "step": 57000 }, { "epoch": 0.04, "eval_loss": 1.8525017499923706, "eval_runtime": 97.6884, "eval_samples_per_second": 102.366, "eval_steps_per_second": 6.398, "step": 57000 }, { "epoch": 0.04, "learning_rate": 4.820365033621518e-05, "loss": 2.067, "step": 57100 }, { "epoch": 0.04, "learning_rate": 4.8200448286903623e-05, "loss": 2.0683, "step": 57200 }, { "epoch": 0.04, "learning_rate": 4.8197246237592056e-05, "loss": 2.0465, "step": 57300 }, { "epoch": 0.04, "learning_rate": 4.81940441882805e-05, "loss": 2.0456, "step": 57400 }, { "epoch": 0.04, "learning_rate": 4.819084213896894e-05, "loss": 2.0386, "step": 57500 }, { "epoch": 0.04, "learning_rate": 4.818764008965738e-05, "loss": 2.0801, "step": 57600 }, { "epoch": 0.04, "learning_rate": 4.818443804034582e-05, "loss": 2.0583, "step": 57700 }, { "epoch": 0.04, "learning_rate": 4.818123599103426e-05, "loss": 2.0765, "step": 57800 }, { "epoch": 0.04, "learning_rate": 4.817803394172271e-05, "loss": 2.0623, "step": 57900 }, { "epoch": 0.04, "learning_rate": 4.817483189241114e-05, "loss": 2.0463, "step": 58000 }, { "epoch": 0.04, "eval_loss": 1.8475555181503296, "eval_runtime": 96.7491, "eval_samples_per_second": 103.36, "eval_steps_per_second": 6.46, "step": 58000 }, { "epoch": 0.04, "learning_rate": 4.817162984309959e-05, "loss": 2.0768, "step": 58100 }, { "epoch": 0.04, "learning_rate": 4.816842779378803e-05, "loss": 2.0765, "step": 58200 }, { "epoch": 0.04, "learning_rate": 4.816522574447647e-05, "loss": 2.0669, "step": 58300 }, { "epoch": 0.04, "learning_rate": 4.816202369516491e-05, "loss": 2.0459, "step": 58400 }, { "epoch": 0.04, "learning_rate": 4.815882164585335e-05, "loss": 2.0661, "step": 58500 }, { "epoch": 0.04, "learning_rate": 4.815561959654179e-05, "loss": 2.0587, "step": 58600 }, { "epoch": 0.04, "learning_rate": 4.8152417547230226e-05, "loss": 2.0409, "step": 58700 }, { "epoch": 0.04, "learning_rate": 4.814921549791867e-05, "loss": 2.0356, "step": 58800 }, { "epoch": 0.04, "learning_rate": 4.814601344860711e-05, "loss": 2.0542, "step": 58900 }, { "epoch": 0.04, "learning_rate": 4.814281139929555e-05, "loss": 2.0316, "step": 59000 }, { "epoch": 0.04, "eval_loss": 1.844751000404358, "eval_runtime": 97.7255, "eval_samples_per_second": 102.327, "eval_steps_per_second": 6.395, "step": 59000 }, { "epoch": 0.04, "learning_rate": 4.813960934998399e-05, "loss": 2.0636, "step": 59100 }, { "epoch": 0.04, "learning_rate": 4.813640730067243e-05, "loss": 2.0318, "step": 59200 }, { "epoch": 0.04, "learning_rate": 4.813320525136087e-05, "loss": 2.0255, "step": 59300 }, { "epoch": 0.04, "learning_rate": 4.813000320204931e-05, "loss": 2.0472, "step": 59400 }, { "epoch": 0.04, "learning_rate": 4.812680115273776e-05, "loss": 2.068, "step": 59500 }, { "epoch": 0.04, "learning_rate": 4.812359910342619e-05, "loss": 2.0462, "step": 59600 }, { "epoch": 0.04, "learning_rate": 4.812039705411464e-05, "loss": 2.0365, "step": 59700 }, { "epoch": 0.04, "learning_rate": 4.811719500480308e-05, "loss": 2.0592, "step": 59800 }, { "epoch": 0.04, "learning_rate": 4.811399295549152e-05, "loss": 2.0402, "step": 59900 }, { "epoch": 0.04, "learning_rate": 4.8110790906179956e-05, "loss": 2.0648, "step": 60000 }, { "epoch": 0.04, "eval_loss": 1.8426470756530762, "eval_runtime": 104.0421, "eval_samples_per_second": 96.115, "eval_steps_per_second": 6.007, "step": 60000 }, { "epoch": 0.04, "learning_rate": 4.8107588856868396e-05, "loss": 2.0526, "step": 60100 }, { "epoch": 0.04, "learning_rate": 4.810438680755684e-05, "loss": 2.0276, "step": 60200 }, { "epoch": 0.04, "learning_rate": 4.8101184758245276e-05, "loss": 2.0248, "step": 60300 }, { "epoch": 0.04, "learning_rate": 4.809798270893372e-05, "loss": 2.0635, "step": 60400 }, { "epoch": 0.04, "learning_rate": 4.809478065962216e-05, "loss": 2.0396, "step": 60500 }, { "epoch": 0.04, "learning_rate": 4.80915786103106e-05, "loss": 2.0346, "step": 60600 }, { "epoch": 0.04, "learning_rate": 4.808837656099904e-05, "loss": 2.0585, "step": 60700 }, { "epoch": 0.04, "learning_rate": 4.808517451168748e-05, "loss": 2.0302, "step": 60800 }, { "epoch": 0.04, "learning_rate": 4.808197246237592e-05, "loss": 2.0728, "step": 60900 }, { "epoch": 0.04, "learning_rate": 4.807877041306436e-05, "loss": 2.0468, "step": 61000 }, { "epoch": 0.04, "eval_loss": 1.8420785665512085, "eval_runtime": 96.7926, "eval_samples_per_second": 103.314, "eval_steps_per_second": 6.457, "step": 61000 }, { "epoch": 0.04, "learning_rate": 4.807556836375281e-05, "loss": 2.0413, "step": 61100 }, { "epoch": 0.04, "learning_rate": 4.807236631444125e-05, "loss": 2.035, "step": 61200 }, { "epoch": 0.04, "learning_rate": 4.8069164265129687e-05, "loss": 2.0525, "step": 61300 }, { "epoch": 0.04, "learning_rate": 4.8065962215818126e-05, "loss": 2.0242, "step": 61400 }, { "epoch": 0.04, "learning_rate": 4.8062760166506566e-05, "loss": 2.0444, "step": 61500 }, { "epoch": 0.04, "learning_rate": 4.8059558117195006e-05, "loss": 2.02, "step": 61600 }, { "epoch": 0.04, "learning_rate": 4.8056356067883446e-05, "loss": 2.0317, "step": 61700 }, { "epoch": 0.04, "learning_rate": 4.805315401857189e-05, "loss": 2.0239, "step": 61800 }, { "epoch": 0.04, "learning_rate": 4.8049951969260325e-05, "loss": 2.0397, "step": 61900 }, { "epoch": 0.04, "learning_rate": 4.804674991994877e-05, "loss": 2.0211, "step": 62000 }, { "epoch": 0.04, "eval_loss": 1.8439449071884155, "eval_runtime": 102.0749, "eval_samples_per_second": 97.967, "eval_steps_per_second": 6.123, "step": 62000 }, { "epoch": 0.04, "learning_rate": 4.804354787063721e-05, "loss": 2.0038, "step": 62100 }, { "epoch": 0.04, "learning_rate": 4.804034582132565e-05, "loss": 2.0033, "step": 62200 }, { "epoch": 0.04, "learning_rate": 4.803714377201409e-05, "loss": 2.0401, "step": 62300 }, { "epoch": 0.04, "learning_rate": 4.803394172270253e-05, "loss": 2.0236, "step": 62400 }, { "epoch": 0.04, "learning_rate": 4.803073967339097e-05, "loss": 2.0548, "step": 62500 }, { "epoch": 0.04, "learning_rate": 4.802753762407941e-05, "loss": 2.0465, "step": 62600 }, { "epoch": 0.04, "learning_rate": 4.8024335574767856e-05, "loss": 2.051, "step": 62700 }, { "epoch": 0.04, "learning_rate": 4.8021133525456296e-05, "loss": 2.0535, "step": 62800 }, { "epoch": 0.04, "learning_rate": 4.8017931476144736e-05, "loss": 2.0553, "step": 62900 }, { "epoch": 0.04, "learning_rate": 4.8014729426833176e-05, "loss": 2.0266, "step": 63000 }, { "epoch": 0.04, "eval_loss": 1.8428466320037842, "eval_runtime": 101.1535, "eval_samples_per_second": 98.86, "eval_steps_per_second": 6.179, "step": 63000 }, { "epoch": 0.04, "learning_rate": 4.8011527377521615e-05, "loss": 2.0407, "step": 63100 }, { "epoch": 0.04, "learning_rate": 4.8008325328210055e-05, "loss": 2.0167, "step": 63200 }, { "epoch": 0.04, "learning_rate": 4.8005123278898495e-05, "loss": 2.0508, "step": 63300 }, { "epoch": 0.04, "learning_rate": 4.8001921229586935e-05, "loss": 2.0312, "step": 63400 }, { "epoch": 0.04, "learning_rate": 4.799871918027538e-05, "loss": 2.0487, "step": 63500 }, { "epoch": 0.04, "learning_rate": 4.799551713096382e-05, "loss": 2.0376, "step": 63600 }, { "epoch": 0.04, "learning_rate": 4.799231508165226e-05, "loss": 2.059, "step": 63700 }, { "epoch": 0.04, "learning_rate": 4.79891130323407e-05, "loss": 2.0582, "step": 63800 }, { "epoch": 0.04, "learning_rate": 4.798591098302914e-05, "loss": 2.0421, "step": 63900 }, { "epoch": 0.04, "learning_rate": 4.798270893371758e-05, "loss": 2.0221, "step": 64000 }, { "epoch": 0.04, "eval_loss": 1.83964204788208, "eval_runtime": 100.8398, "eval_samples_per_second": 99.167, "eval_steps_per_second": 6.198, "step": 64000 }, { "epoch": 0.04, "learning_rate": 4.797950688440602e-05, "loss": 2.0414, "step": 64100 }, { "epoch": 0.04, "learning_rate": 4.7976304835094466e-05, "loss": 2.0228, "step": 64200 }, { "epoch": 0.04, "learning_rate": 4.7973102785782906e-05, "loss": 2.0325, "step": 64300 }, { "epoch": 0.04, "learning_rate": 4.7969900736471346e-05, "loss": 2.0379, "step": 64400 }, { "epoch": 0.04, "learning_rate": 4.7966698687159785e-05, "loss": 2.0577, "step": 64500 }, { "epoch": 0.04, "learning_rate": 4.7963496637848225e-05, "loss": 2.0537, "step": 64600 }, { "epoch": 0.04, "learning_rate": 4.7960294588536665e-05, "loss": 2.0312, "step": 64700 }, { "epoch": 0.04, "learning_rate": 4.7957092539225105e-05, "loss": 2.0297, "step": 64800 }, { "epoch": 0.04, "learning_rate": 4.7953890489913544e-05, "loss": 2.0506, "step": 64900 }, { "epoch": 0.04, "learning_rate": 4.7950688440601984e-05, "loss": 2.026, "step": 65000 }, { "epoch": 0.04, "eval_loss": 1.8451613187789917, "eval_runtime": 97.7453, "eval_samples_per_second": 102.307, "eval_steps_per_second": 6.394, "step": 65000 }, { "epoch": 0.04, "learning_rate": 4.794748639129043e-05, "loss": 2.0352, "step": 65100 }, { "epoch": 0.04, "learning_rate": 4.794428434197887e-05, "loss": 2.0222, "step": 65200 }, { "epoch": 0.04, "learning_rate": 4.794108229266731e-05, "loss": 2.0241, "step": 65300 }, { "epoch": 0.04, "learning_rate": 4.793788024335575e-05, "loss": 2.0294, "step": 65400 }, { "epoch": 0.04, "learning_rate": 4.793467819404419e-05, "loss": 2.0335, "step": 65500 }, { "epoch": 0.04, "learning_rate": 4.793147614473263e-05, "loss": 2.0397, "step": 65600 }, { "epoch": 0.04, "learning_rate": 4.792827409542107e-05, "loss": 2.0055, "step": 65700 }, { "epoch": 0.04, "learning_rate": 4.7925072046109515e-05, "loss": 2.027, "step": 65800 }, { "epoch": 0.04, "learning_rate": 4.7921869996797955e-05, "loss": 2.0261, "step": 65900 }, { "epoch": 0.04, "learning_rate": 4.7918667947486395e-05, "loss": 2.0253, "step": 66000 }, { "epoch": 0.04, "eval_loss": 1.8369171619415283, "eval_runtime": 97.2534, "eval_samples_per_second": 102.824, "eval_steps_per_second": 6.427, "step": 66000 }, { "epoch": 0.04, "learning_rate": 4.7915465898174835e-05, "loss": 2.0686, "step": 66100 }, { "epoch": 0.04, "learning_rate": 4.7912263848863274e-05, "loss": 2.0339, "step": 66200 }, { "epoch": 0.04, "learning_rate": 4.7909061799551714e-05, "loss": 2.0176, "step": 66300 }, { "epoch": 0.04, "learning_rate": 4.7905859750240154e-05, "loss": 2.0337, "step": 66400 }, { "epoch": 0.04, "learning_rate": 4.79026577009286e-05, "loss": 2.0334, "step": 66500 }, { "epoch": 0.04, "learning_rate": 4.7899455651617033e-05, "loss": 2.059, "step": 66600 }, { "epoch": 0.04, "learning_rate": 4.789625360230548e-05, "loss": 2.0478, "step": 66700 }, { "epoch": 0.04, "learning_rate": 4.789305155299392e-05, "loss": 2.0303, "step": 66800 }, { "epoch": 0.04, "learning_rate": 4.788984950368236e-05, "loss": 2.0487, "step": 66900 }, { "epoch": 0.04, "learning_rate": 4.78866474543708e-05, "loss": 2.0421, "step": 67000 }, { "epoch": 0.04, "eval_loss": 1.8353855609893799, "eval_runtime": 95.4591, "eval_samples_per_second": 104.757, "eval_steps_per_second": 6.547, "step": 67000 }, { "epoch": 0.04, "learning_rate": 4.788344540505924e-05, "loss": 2.0385, "step": 67100 }, { "epoch": 0.04, "learning_rate": 4.788024335574768e-05, "loss": 2.0461, "step": 67200 }, { "epoch": 0.04, "learning_rate": 4.787704130643612e-05, "loss": 2.0424, "step": 67300 }, { "epoch": 0.04, "learning_rate": 4.7873839257124565e-05, "loss": 2.0494, "step": 67400 }, { "epoch": 0.04, "learning_rate": 4.7870637207813005e-05, "loss": 2.0148, "step": 67500 }, { "epoch": 0.04, "learning_rate": 4.7867435158501444e-05, "loss": 2.0457, "step": 67600 }, { "epoch": 0.04, "learning_rate": 4.7864233109189884e-05, "loss": 2.0366, "step": 67700 }, { "epoch": 0.04, "learning_rate": 4.7861031059878324e-05, "loss": 2.039, "step": 67800 }, { "epoch": 0.04, "learning_rate": 4.7857829010566764e-05, "loss": 2.0209, "step": 67900 }, { "epoch": 0.04, "learning_rate": 4.78546269612552e-05, "loss": 2.0139, "step": 68000 }, { "epoch": 0.04, "eval_loss": 1.8351616859436035, "eval_runtime": 98.9862, "eval_samples_per_second": 101.024, "eval_steps_per_second": 6.314, "step": 68000 }, { "epoch": 0.04, "learning_rate": 4.785142491194365e-05, "loss": 2.0459, "step": 68100 }, { "epoch": 0.04, "learning_rate": 4.784822286263208e-05, "loss": 2.0121, "step": 68200 }, { "epoch": 0.04, "learning_rate": 4.784502081332053e-05, "loss": 2.0263, "step": 68300 }, { "epoch": 0.04, "learning_rate": 4.784181876400897e-05, "loss": 2.021, "step": 68400 }, { "epoch": 0.04, "learning_rate": 4.783861671469741e-05, "loss": 2.0067, "step": 68500 }, { "epoch": 0.04, "learning_rate": 4.783541466538585e-05, "loss": 2.0119, "step": 68600 }, { "epoch": 0.04, "learning_rate": 4.783221261607429e-05, "loss": 2.0464, "step": 68700 }, { "epoch": 0.04, "learning_rate": 4.7829010566762735e-05, "loss": 2.0331, "step": 68800 }, { "epoch": 0.04, "learning_rate": 4.782580851745117e-05, "loss": 1.9977, "step": 68900 }, { "epoch": 0.04, "learning_rate": 4.7822606468139614e-05, "loss": 2.0366, "step": 69000 }, { "epoch": 0.04, "eval_loss": 1.8367480039596558, "eval_runtime": 100.7618, "eval_samples_per_second": 99.244, "eval_steps_per_second": 6.203, "step": 69000 }, { "epoch": 0.04, "learning_rate": 4.7819404418828054e-05, "loss": 2.0097, "step": 69100 }, { "epoch": 0.04, "learning_rate": 4.7816202369516494e-05, "loss": 2.0053, "step": 69200 }, { "epoch": 0.04, "learning_rate": 4.7813000320204933e-05, "loss": 2.0189, "step": 69300 }, { "epoch": 0.04, "learning_rate": 4.780979827089337e-05, "loss": 2.0432, "step": 69400 }, { "epoch": 0.04, "learning_rate": 4.780659622158181e-05, "loss": 2.0199, "step": 69500 }, { "epoch": 0.04, "learning_rate": 4.780339417227025e-05, "loss": 2.0163, "step": 69600 }, { "epoch": 0.04, "learning_rate": 4.78001921229587e-05, "loss": 2.0367, "step": 69700 }, { "epoch": 0.04, "learning_rate": 4.779699007364713e-05, "loss": 2.0073, "step": 69800 }, { "epoch": 0.04, "learning_rate": 4.779378802433558e-05, "loss": 2.0031, "step": 69900 }, { "epoch": 0.04, "learning_rate": 4.779058597502402e-05, "loss": 2.0395, "step": 70000 }, { "epoch": 0.04, "eval_loss": 1.8332195281982422, "eval_runtime": 103.3286, "eval_samples_per_second": 96.779, "eval_steps_per_second": 6.049, "step": 70000 }, { "epoch": 0.04, "learning_rate": 4.778738392571246e-05, "loss": 2.0184, "step": 70100 }, { "epoch": 0.04, "learning_rate": 4.77841818764009e-05, "loss": 2.0356, "step": 70200 }, { "epoch": 0.04, "learning_rate": 4.778097982708934e-05, "loss": 2.0296, "step": 70300 }, { "epoch": 0.05, "learning_rate": 4.7777777777777784e-05, "loss": 2.0181, "step": 70400 }, { "epoch": 0.05, "learning_rate": 4.777457572846622e-05, "loss": 2.029, "step": 70500 }, { "epoch": 0.05, "learning_rate": 4.7771373679154664e-05, "loss": 2.0374, "step": 70600 }, { "epoch": 0.05, "learning_rate": 4.77681716298431e-05, "loss": 2.013, "step": 70700 }, { "epoch": 0.05, "learning_rate": 4.776496958053154e-05, "loss": 2.0185, "step": 70800 }, { "epoch": 0.05, "learning_rate": 4.776176753121998e-05, "loss": 2.0284, "step": 70900 }, { "epoch": 0.05, "learning_rate": 4.775856548190842e-05, "loss": 2.0239, "step": 71000 }, { "epoch": 0.05, "eval_loss": 1.836153507232666, "eval_runtime": 96.7425, "eval_samples_per_second": 103.367, "eval_steps_per_second": 6.46, "step": 71000 }, { "epoch": 0.05, "learning_rate": 4.775536343259687e-05, "loss": 2.0208, "step": 71100 }, { "epoch": 0.05, "learning_rate": 4.77521613832853e-05, "loss": 2.0228, "step": 71200 }, { "epoch": 0.05, "learning_rate": 4.774895933397375e-05, "loss": 2.0256, "step": 71300 }, { "epoch": 0.05, "learning_rate": 4.774575728466218e-05, "loss": 2.007, "step": 71400 }, { "epoch": 0.05, "learning_rate": 4.774255523535063e-05, "loss": 2.0162, "step": 71500 }, { "epoch": 0.05, "learning_rate": 4.773935318603907e-05, "loss": 2.0144, "step": 71600 }, { "epoch": 0.05, "learning_rate": 4.773615113672751e-05, "loss": 2.0105, "step": 71700 }, { "epoch": 0.05, "learning_rate": 4.773294908741595e-05, "loss": 2.034, "step": 71800 }, { "epoch": 0.05, "learning_rate": 4.772974703810439e-05, "loss": 2.0419, "step": 71900 }, { "epoch": 0.05, "learning_rate": 4.7726544988792834e-05, "loss": 2.013, "step": 72000 }, { "epoch": 0.05, "eval_loss": 1.832377314567566, "eval_runtime": 99.2222, "eval_samples_per_second": 100.784, "eval_steps_per_second": 6.299, "step": 72000 }, { "epoch": 0.05, "learning_rate": 4.7723342939481266e-05, "loss": 2.0237, "step": 72100 }, { "epoch": 0.05, "learning_rate": 4.772014089016971e-05, "loss": 2.0164, "step": 72200 }, { "epoch": 0.05, "learning_rate": 4.771693884085815e-05, "loss": 2.0237, "step": 72300 }, { "epoch": 0.05, "learning_rate": 4.771373679154659e-05, "loss": 2.0012, "step": 72400 }, { "epoch": 0.05, "learning_rate": 4.771053474223503e-05, "loss": 1.9963, "step": 72500 }, { "epoch": 0.05, "learning_rate": 4.770733269292347e-05, "loss": 2.0263, "step": 72600 }, { "epoch": 0.05, "learning_rate": 4.770413064361192e-05, "loss": 2.0062, "step": 72700 }, { "epoch": 0.05, "learning_rate": 4.770092859430035e-05, "loss": 1.9949, "step": 72800 }, { "epoch": 0.05, "learning_rate": 4.76977265449888e-05, "loss": 2.015, "step": 72900 }, { "epoch": 0.05, "learning_rate": 4.769452449567723e-05, "loss": 2.008, "step": 73000 }, { "epoch": 0.05, "eval_loss": 1.8327128887176514, "eval_runtime": 100.2695, "eval_samples_per_second": 99.731, "eval_steps_per_second": 6.233, "step": 73000 }, { "epoch": 0.05, "learning_rate": 4.769132244636568e-05, "loss": 2.0259, "step": 73100 }, { "epoch": 0.05, "learning_rate": 4.768812039705412e-05, "loss": 2.0051, "step": 73200 }, { "epoch": 0.05, "learning_rate": 4.768491834774256e-05, "loss": 2.013, "step": 73300 }, { "epoch": 0.05, "learning_rate": 4.7681716298431003e-05, "loss": 2.0244, "step": 73400 }, { "epoch": 0.05, "learning_rate": 4.7678514249119436e-05, "loss": 2.0058, "step": 73500 }, { "epoch": 0.05, "learning_rate": 4.767531219980788e-05, "loss": 1.9992, "step": 73600 }, { "epoch": 0.05, "learning_rate": 4.7672110150496316e-05, "loss": 2.0262, "step": 73700 }, { "epoch": 0.05, "learning_rate": 4.766890810118476e-05, "loss": 2.0142, "step": 73800 }, { "epoch": 0.05, "learning_rate": 4.76657060518732e-05, "loss": 1.9957, "step": 73900 }, { "epoch": 0.05, "learning_rate": 4.766250400256164e-05, "loss": 2.0104, "step": 74000 }, { "epoch": 0.05, "eval_loss": 1.8299044370651245, "eval_runtime": 98.1132, "eval_samples_per_second": 101.923, "eval_steps_per_second": 6.37, "step": 74000 }, { "epoch": 0.05, "learning_rate": 4.765930195325009e-05, "loss": 1.9826, "step": 74100 }, { "epoch": 0.05, "learning_rate": 4.765609990393852e-05, "loss": 2.0157, "step": 74200 }, { "epoch": 0.05, "learning_rate": 4.765289785462697e-05, "loss": 2.0151, "step": 74300 }, { "epoch": 0.05, "learning_rate": 4.76496958053154e-05, "loss": 2.0152, "step": 74400 }, { "epoch": 0.05, "learning_rate": 4.764649375600385e-05, "loss": 2.0033, "step": 74500 }, { "epoch": 0.05, "learning_rate": 4.764329170669228e-05, "loss": 2.0162, "step": 74600 }, { "epoch": 0.05, "learning_rate": 4.764008965738073e-05, "loss": 2.0152, "step": 74700 }, { "epoch": 0.05, "learning_rate": 4.7636887608069167e-05, "loss": 2.008, "step": 74800 }, { "epoch": 0.05, "learning_rate": 4.7633685558757606e-05, "loss": 2.0119, "step": 74900 }, { "epoch": 0.05, "learning_rate": 4.763048350944605e-05, "loss": 1.9959, "step": 75000 }, { "epoch": 0.05, "eval_loss": 1.8265933990478516, "eval_runtime": 95.7289, "eval_samples_per_second": 104.462, "eval_steps_per_second": 6.529, "step": 75000 }, { "epoch": 0.05, "learning_rate": 4.7627281460134486e-05, "loss": 2.0309, "step": 75100 }, { "epoch": 0.05, "learning_rate": 4.762407941082293e-05, "loss": 1.9883, "step": 75200 }, { "epoch": 0.05, "learning_rate": 4.7620877361511365e-05, "loss": 1.9987, "step": 75300 }, { "epoch": 0.05, "learning_rate": 4.761767531219981e-05, "loss": 2.0071, "step": 75400 }, { "epoch": 0.05, "learning_rate": 4.761447326288825e-05, "loss": 2.0143, "step": 75500 }, { "epoch": 0.05, "learning_rate": 4.761127121357669e-05, "loss": 2.0126, "step": 75600 }, { "epoch": 0.05, "learning_rate": 4.760806916426514e-05, "loss": 1.9888, "step": 75700 }, { "epoch": 0.05, "learning_rate": 4.760486711495357e-05, "loss": 2.0059, "step": 75800 }, { "epoch": 0.05, "learning_rate": 4.760166506564202e-05, "loss": 2.0044, "step": 75900 }, { "epoch": 0.05, "learning_rate": 4.759846301633045e-05, "loss": 2.0008, "step": 76000 }, { "epoch": 0.05, "eval_loss": 1.8267353773117065, "eval_runtime": 95.618, "eval_samples_per_second": 104.583, "eval_steps_per_second": 6.536, "step": 76000 }, { "epoch": 0.05, "learning_rate": 4.75952609670189e-05, "loss": 2.0072, "step": 76100 }, { "epoch": 0.05, "learning_rate": 4.759205891770733e-05, "loss": 2.0035, "step": 76200 }, { "epoch": 0.05, "learning_rate": 4.7588856868395776e-05, "loss": 2.0147, "step": 76300 }, { "epoch": 0.05, "learning_rate": 4.7585654819084216e-05, "loss": 2.0142, "step": 76400 }, { "epoch": 0.05, "learning_rate": 4.7582452769772656e-05, "loss": 2.0178, "step": 76500 }, { "epoch": 0.05, "learning_rate": 4.75792507204611e-05, "loss": 1.981, "step": 76600 }, { "epoch": 0.05, "learning_rate": 4.7576048671149535e-05, "loss": 2.0076, "step": 76700 }, { "epoch": 0.05, "learning_rate": 4.757284662183798e-05, "loss": 1.9995, "step": 76800 }, { "epoch": 0.05, "learning_rate": 4.7569644572526415e-05, "loss": 1.9998, "step": 76900 }, { "epoch": 0.05, "learning_rate": 4.756644252321486e-05, "loss": 2.0077, "step": 77000 }, { "epoch": 0.05, "eval_loss": 1.824723482131958, "eval_runtime": 109.3403, "eval_samples_per_second": 91.458, "eval_steps_per_second": 5.716, "step": 77000 }, { "epoch": 0.05, "learning_rate": 4.75632404739033e-05, "loss": 2.0105, "step": 77100 }, { "epoch": 0.05, "learning_rate": 4.756003842459174e-05, "loss": 2.0229, "step": 77200 }, { "epoch": 0.05, "learning_rate": 4.755683637528019e-05, "loss": 1.9905, "step": 77300 }, { "epoch": 0.05, "learning_rate": 4.755363432596862e-05, "loss": 2.015, "step": 77400 }, { "epoch": 0.05, "learning_rate": 4.7550432276657067e-05, "loss": 2.0205, "step": 77500 }, { "epoch": 0.05, "learning_rate": 4.75472302273455e-05, "loss": 2.0024, "step": 77600 }, { "epoch": 0.05, "learning_rate": 4.7544028178033946e-05, "loss": 2.0342, "step": 77700 }, { "epoch": 0.05, "learning_rate": 4.754082612872238e-05, "loss": 2.011, "step": 77800 }, { "epoch": 0.05, "learning_rate": 4.7537624079410826e-05, "loss": 2.0019, "step": 77900 }, { "epoch": 0.05, "learning_rate": 4.7534422030099265e-05, "loss": 1.9807, "step": 78000 }, { "epoch": 0.05, "eval_loss": 1.831045150756836, "eval_runtime": 104.0686, "eval_samples_per_second": 96.09, "eval_steps_per_second": 6.006, "step": 78000 }, { "epoch": 0.05, "learning_rate": 4.7531219980787705e-05, "loss": 2.0054, "step": 78100 }, { "epoch": 0.05, "learning_rate": 4.752801793147615e-05, "loss": 2.0062, "step": 78200 }, { "epoch": 0.05, "learning_rate": 4.7524815882164584e-05, "loss": 2.0297, "step": 78300 }, { "epoch": 0.05, "learning_rate": 4.752161383285303e-05, "loss": 2.0192, "step": 78400 }, { "epoch": 0.05, "learning_rate": 4.7518411783541464e-05, "loss": 2.0152, "step": 78500 }, { "epoch": 0.05, "learning_rate": 4.751520973422991e-05, "loss": 2.0094, "step": 78600 }, { "epoch": 0.05, "learning_rate": 4.751200768491835e-05, "loss": 2.0057, "step": 78700 }, { "epoch": 0.05, "learning_rate": 4.750880563560679e-05, "loss": 2.0004, "step": 78800 }, { "epoch": 0.05, "learning_rate": 4.7505603586295236e-05, "loss": 2.0, "step": 78900 }, { "epoch": 0.05, "learning_rate": 4.750240153698367e-05, "loss": 2.0136, "step": 79000 }, { "epoch": 0.05, "eval_loss": 1.828290581703186, "eval_runtime": 103.7187, "eval_samples_per_second": 96.415, "eval_steps_per_second": 6.026, "step": 79000 }, { "epoch": 0.05, "learning_rate": 4.7499199487672116e-05, "loss": 1.9904, "step": 79100 }, { "epoch": 0.05, "learning_rate": 4.749599743836055e-05, "loss": 1.9881, "step": 79200 }, { "epoch": 0.05, "learning_rate": 4.7492795389048995e-05, "loss": 2.0007, "step": 79300 }, { "epoch": 0.05, "learning_rate": 4.748959333973743e-05, "loss": 2.0115, "step": 79400 }, { "epoch": 0.05, "learning_rate": 4.7486391290425875e-05, "loss": 2.0286, "step": 79500 }, { "epoch": 0.05, "learning_rate": 4.7483189241114315e-05, "loss": 2.03, "step": 79600 }, { "epoch": 0.05, "learning_rate": 4.7479987191802754e-05, "loss": 1.9968, "step": 79700 }, { "epoch": 0.05, "learning_rate": 4.74767851424912e-05, "loss": 1.9972, "step": 79800 }, { "epoch": 0.05, "learning_rate": 4.7473583093179634e-05, "loss": 1.9978, "step": 79900 }, { "epoch": 0.05, "learning_rate": 4.747038104386808e-05, "loss": 1.9786, "step": 80000 }, { "epoch": 0.05, "eval_loss": 1.8277857303619385, "eval_runtime": 106.9551, "eval_samples_per_second": 93.497, "eval_steps_per_second": 5.844, "step": 80000 }, { "epoch": 0.05, "learning_rate": 4.746717899455651e-05, "loss": 2.0056, "step": 80100 }, { "epoch": 0.05, "learning_rate": 4.746397694524496e-05, "loss": 1.9901, "step": 80200 }, { "epoch": 0.05, "learning_rate": 4.74607748959334e-05, "loss": 1.9956, "step": 80300 }, { "epoch": 0.05, "learning_rate": 4.745757284662184e-05, "loss": 2.0133, "step": 80400 }, { "epoch": 0.05, "learning_rate": 4.7454370797310286e-05, "loss": 2.0107, "step": 80500 }, { "epoch": 0.05, "learning_rate": 4.745116874799872e-05, "loss": 2.0201, "step": 80600 }, { "epoch": 0.05, "learning_rate": 4.7447966698687165e-05, "loss": 1.9829, "step": 80700 }, { "epoch": 0.05, "learning_rate": 4.74447646493756e-05, "loss": 1.9978, "step": 80800 }, { "epoch": 0.05, "learning_rate": 4.7441562600064045e-05, "loss": 1.9806, "step": 80900 }, { "epoch": 0.05, "learning_rate": 4.7438360550752485e-05, "loss": 2.009, "step": 81000 }, { "epoch": 0.05, "eval_loss": 1.8248462677001953, "eval_runtime": 97.1832, "eval_samples_per_second": 102.898, "eval_steps_per_second": 6.431, "step": 81000 }, { "epoch": 0.05, "learning_rate": 4.7435158501440924e-05, "loss": 2.0095, "step": 81100 }, { "epoch": 0.05, "learning_rate": 4.7431956452129364e-05, "loss": 2.003, "step": 81200 }, { "epoch": 0.05, "learning_rate": 4.7428754402817804e-05, "loss": 2.0118, "step": 81300 }, { "epoch": 0.05, "learning_rate": 4.742555235350625e-05, "loss": 1.9928, "step": 81400 }, { "epoch": 0.05, "learning_rate": 4.742235030419468e-05, "loss": 2.0022, "step": 81500 }, { "epoch": 0.05, "learning_rate": 4.741914825488313e-05, "loss": 2.0008, "step": 81600 }, { "epoch": 0.05, "learning_rate": 4.741594620557156e-05, "loss": 1.9968, "step": 81700 }, { "epoch": 0.05, "learning_rate": 4.741274415626001e-05, "loss": 2.0178, "step": 81800 }, { "epoch": 0.05, "learning_rate": 4.740954210694845e-05, "loss": 1.9941, "step": 81900 }, { "epoch": 0.05, "learning_rate": 4.740634005763689e-05, "loss": 1.9899, "step": 82000 }, { "epoch": 0.05, "eval_loss": 1.8250046968460083, "eval_runtime": 98.5125, "eval_samples_per_second": 101.51, "eval_steps_per_second": 6.344, "step": 82000 }, { "epoch": 0.05, "learning_rate": 4.740313800832533e-05, "loss": 2.0003, "step": 82100 }, { "epoch": 0.05, "learning_rate": 4.739993595901377e-05, "loss": 1.9932, "step": 82200 }, { "epoch": 0.05, "learning_rate": 4.7396733909702215e-05, "loss": 2.0191, "step": 82300 }, { "epoch": 0.05, "learning_rate": 4.739353186039065e-05, "loss": 2.0006, "step": 82400 }, { "epoch": 0.05, "learning_rate": 4.7390329811079094e-05, "loss": 1.9884, "step": 82500 }, { "epoch": 0.05, "learning_rate": 4.7387127761767534e-05, "loss": 2.0011, "step": 82600 }, { "epoch": 0.05, "learning_rate": 4.7383925712455974e-05, "loss": 1.9744, "step": 82700 }, { "epoch": 0.05, "learning_rate": 4.738072366314441e-05, "loss": 2.0134, "step": 82800 }, { "epoch": 0.05, "learning_rate": 4.737752161383285e-05, "loss": 1.9963, "step": 82900 }, { "epoch": 0.05, "learning_rate": 4.73743195645213e-05, "loss": 1.9891, "step": 83000 }, { "epoch": 0.05, "eval_loss": 1.823404312133789, "eval_runtime": 102.8474, "eval_samples_per_second": 97.231, "eval_steps_per_second": 6.077, "step": 83000 }, { "epoch": 0.05, "learning_rate": 4.737111751520973e-05, "loss": 1.9995, "step": 83100 }, { "epoch": 0.05, "learning_rate": 4.736791546589818e-05, "loss": 2.008, "step": 83200 }, { "epoch": 0.05, "learning_rate": 4.736471341658662e-05, "loss": 1.9872, "step": 83300 }, { "epoch": 0.05, "learning_rate": 4.736151136727506e-05, "loss": 2.0044, "step": 83400 }, { "epoch": 0.05, "learning_rate": 4.73583093179635e-05, "loss": 2.0059, "step": 83500 }, { "epoch": 0.05, "learning_rate": 4.735510726865194e-05, "loss": 2.0114, "step": 83600 }, { "epoch": 0.05, "learning_rate": 4.735190521934038e-05, "loss": 2.0166, "step": 83700 }, { "epoch": 0.05, "learning_rate": 4.734870317002882e-05, "loss": 1.9786, "step": 83800 }, { "epoch": 0.05, "learning_rate": 4.7345501120717264e-05, "loss": 2.0042, "step": 83900 }, { "epoch": 0.05, "learning_rate": 4.7342299071405704e-05, "loss": 2.0052, "step": 84000 }, { "epoch": 0.05, "eval_loss": 1.8235909938812256, "eval_runtime": 100.5113, "eval_samples_per_second": 99.491, "eval_steps_per_second": 6.218, "step": 84000 }, { "epoch": 0.05, "learning_rate": 4.7339097022094144e-05, "loss": 1.9984, "step": 84100 }, { "epoch": 0.05, "learning_rate": 4.733589497278258e-05, "loss": 1.9829, "step": 84200 }, { "epoch": 0.05, "learning_rate": 4.733269292347102e-05, "loss": 1.9833, "step": 84300 }, { "epoch": 0.05, "learning_rate": 4.732949087415946e-05, "loss": 2.0085, "step": 84400 }, { "epoch": 0.05, "learning_rate": 4.73262888248479e-05, "loss": 1.9939, "step": 84500 }, { "epoch": 0.05, "learning_rate": 4.732308677553635e-05, "loss": 1.988, "step": 84600 }, { "epoch": 0.05, "learning_rate": 4.731988472622478e-05, "loss": 1.9987, "step": 84700 }, { "epoch": 0.05, "learning_rate": 4.731668267691323e-05, "loss": 2.0046, "step": 84800 }, { "epoch": 0.05, "learning_rate": 4.731348062760167e-05, "loss": 1.9943, "step": 84900 }, { "epoch": 0.05, "learning_rate": 4.731027857829011e-05, "loss": 2.0016, "step": 85000 }, { "epoch": 0.05, "eval_loss": 1.8186933994293213, "eval_runtime": 99.8661, "eval_samples_per_second": 100.134, "eval_steps_per_second": 6.258, "step": 85000 }, { "epoch": 0.05, "learning_rate": 4.730707652897855e-05, "loss": 1.9664, "step": 85100 }, { "epoch": 0.05, "learning_rate": 4.730387447966699e-05, "loss": 1.9992, "step": 85200 }, { "epoch": 0.05, "learning_rate": 4.730067243035543e-05, "loss": 2.0079, "step": 85300 }, { "epoch": 0.05, "learning_rate": 4.729747038104387e-05, "loss": 1.989, "step": 85400 }, { "epoch": 0.05, "learning_rate": 4.7294268331732313e-05, "loss": 1.9804, "step": 85500 }, { "epoch": 0.05, "learning_rate": 4.729106628242075e-05, "loss": 1.9873, "step": 85600 }, { "epoch": 0.05, "learning_rate": 4.728786423310919e-05, "loss": 1.9861, "step": 85700 }, { "epoch": 0.05, "learning_rate": 4.728466218379763e-05, "loss": 1.9743, "step": 85800 }, { "epoch": 0.05, "learning_rate": 4.728146013448607e-05, "loss": 1.9821, "step": 85900 }, { "epoch": 0.06, "learning_rate": 4.727825808517451e-05, "loss": 2.0008, "step": 86000 }, { "epoch": 0.06, "eval_loss": 1.818795919418335, "eval_runtime": 96.3307, "eval_samples_per_second": 103.809, "eval_steps_per_second": 6.488, "step": 86000 }, { "epoch": 0.06, "learning_rate": 4.727505603586295e-05, "loss": 1.9805, "step": 86100 }, { "epoch": 0.06, "learning_rate": 4.72718539865514e-05, "loss": 1.969, "step": 86200 }, { "epoch": 0.06, "learning_rate": 4.726865193723984e-05, "loss": 2.0116, "step": 86300 }, { "epoch": 0.06, "learning_rate": 4.726544988792828e-05, "loss": 1.983, "step": 86400 }, { "epoch": 0.06, "learning_rate": 4.726224783861672e-05, "loss": 1.9873, "step": 86500 }, { "epoch": 0.06, "learning_rate": 4.725904578930516e-05, "loss": 1.9998, "step": 86600 }, { "epoch": 0.06, "learning_rate": 4.72558437399936e-05, "loss": 1.9804, "step": 86700 }, { "epoch": 0.06, "learning_rate": 4.725264169068204e-05, "loss": 2.0031, "step": 86800 }, { "epoch": 0.06, "learning_rate": 4.7249439641370477e-05, "loss": 1.9936, "step": 86900 }, { "epoch": 0.06, "learning_rate": 4.7246237592058916e-05, "loss": 1.9745, "step": 87000 }, { "epoch": 0.06, "eval_loss": 1.819578766822815, "eval_runtime": 104.433, "eval_samples_per_second": 95.755, "eval_steps_per_second": 5.985, "step": 87000 }, { "epoch": 0.06, "learning_rate": 4.724303554274736e-05, "loss": 1.9864, "step": 87100 }, { "epoch": 0.06, "learning_rate": 4.72398334934358e-05, "loss": 1.9789, "step": 87200 }, { "epoch": 0.06, "learning_rate": 4.723663144412424e-05, "loss": 1.9859, "step": 87300 }, { "epoch": 0.06, "learning_rate": 4.723342939481268e-05, "loss": 2.0044, "step": 87400 }, { "epoch": 0.06, "learning_rate": 4.723022734550112e-05, "loss": 2.0001, "step": 87500 }, { "epoch": 0.06, "learning_rate": 4.722702529618956e-05, "loss": 2.0, "step": 87600 }, { "epoch": 0.06, "learning_rate": 4.7223823246878e-05, "loss": 1.9618, "step": 87700 }, { "epoch": 0.06, "learning_rate": 4.722062119756645e-05, "loss": 1.9949, "step": 87800 }, { "epoch": 0.06, "learning_rate": 4.721741914825489e-05, "loss": 1.981, "step": 87900 }, { "epoch": 0.06, "learning_rate": 4.721421709894333e-05, "loss": 2.0041, "step": 88000 }, { "epoch": 0.06, "eval_loss": 1.8221529722213745, "eval_runtime": 108.0262, "eval_samples_per_second": 92.57, "eval_steps_per_second": 5.786, "step": 88000 }, { "epoch": 0.06, "learning_rate": 4.721101504963177e-05, "loss": 2.0162, "step": 88100 }, { "epoch": 0.06, "learning_rate": 4.720781300032021e-05, "loss": 2.0013, "step": 88200 }, { "epoch": 0.06, "learning_rate": 4.7204610951008646e-05, "loss": 1.9868, "step": 88300 }, { "epoch": 0.06, "learning_rate": 4.7201408901697086e-05, "loss": 1.9645, "step": 88400 }, { "epoch": 0.06, "learning_rate": 4.7198206852385526e-05, "loss": 1.9746, "step": 88500 }, { "epoch": 0.06, "learning_rate": 4.719500480307397e-05, "loss": 1.9739, "step": 88600 }, { "epoch": 0.06, "learning_rate": 4.719180275376241e-05, "loss": 1.9794, "step": 88700 }, { "epoch": 0.06, "learning_rate": 4.718860070445085e-05, "loss": 1.9946, "step": 88800 }, { "epoch": 0.06, "learning_rate": 4.718539865513929e-05, "loss": 2.004, "step": 88900 }, { "epoch": 0.06, "learning_rate": 4.718219660582773e-05, "loss": 1.9796, "step": 89000 }, { "epoch": 0.06, "eval_loss": 1.8163881301879883, "eval_runtime": 100.1969, "eval_samples_per_second": 99.804, "eval_steps_per_second": 6.238, "step": 89000 }, { "epoch": 0.06, "learning_rate": 4.717899455651617e-05, "loss": 1.9747, "step": 89100 }, { "epoch": 0.06, "learning_rate": 4.717579250720461e-05, "loss": 1.9869, "step": 89200 }, { "epoch": 0.06, "learning_rate": 4.717259045789305e-05, "loss": 1.9811, "step": 89300 }, { "epoch": 0.06, "learning_rate": 4.71693884085815e-05, "loss": 1.9753, "step": 89400 }, { "epoch": 0.06, "learning_rate": 4.716618635926994e-05, "loss": 1.9703, "step": 89500 }, { "epoch": 0.06, "learning_rate": 4.7162984309958377e-05, "loss": 1.986, "step": 89600 }, { "epoch": 0.06, "learning_rate": 4.7159782260646816e-05, "loss": 1.9793, "step": 89700 }, { "epoch": 0.06, "learning_rate": 4.7156580211335256e-05, "loss": 1.9764, "step": 89800 }, { "epoch": 0.06, "learning_rate": 4.7153378162023696e-05, "loss": 1.9874, "step": 89900 }, { "epoch": 0.06, "learning_rate": 4.7150176112712136e-05, "loss": 1.9738, "step": 90000 }, { "epoch": 0.06, "eval_loss": 1.8204419612884521, "eval_runtime": 98.964, "eval_samples_per_second": 101.047, "eval_steps_per_second": 6.315, "step": 90000 }, { "epoch": 0.06, "learning_rate": 4.7146974063400575e-05, "loss": 1.9734, "step": 90100 }, { "epoch": 0.06, "learning_rate": 4.714377201408902e-05, "loss": 1.9748, "step": 90200 }, { "epoch": 0.06, "learning_rate": 4.714056996477746e-05, "loss": 1.9642, "step": 90300 }, { "epoch": 0.06, "learning_rate": 4.71373679154659e-05, "loss": 1.9778, "step": 90400 }, { "epoch": 0.06, "learning_rate": 4.713416586615434e-05, "loss": 1.9734, "step": 90500 }, { "epoch": 0.06, "learning_rate": 4.713096381684278e-05, "loss": 1.9694, "step": 90600 }, { "epoch": 0.06, "learning_rate": 4.712776176753122e-05, "loss": 1.9826, "step": 90700 }, { "epoch": 0.06, "learning_rate": 4.712455971821966e-05, "loss": 1.9728, "step": 90800 }, { "epoch": 0.06, "learning_rate": 4.712135766890811e-05, "loss": 2.0023, "step": 90900 }, { "epoch": 0.06, "learning_rate": 4.7118155619596546e-05, "loss": 1.9854, "step": 91000 }, { "epoch": 0.06, "eval_loss": 1.815573811531067, "eval_runtime": 98.1433, "eval_samples_per_second": 101.892, "eval_steps_per_second": 6.368, "step": 91000 }, { "epoch": 0.06, "learning_rate": 4.7114953570284986e-05, "loss": 1.9936, "step": 91100 }, { "epoch": 0.06, "learning_rate": 4.7111751520973426e-05, "loss": 2.0072, "step": 91200 }, { "epoch": 0.06, "learning_rate": 4.7108549471661866e-05, "loss": 1.9605, "step": 91300 }, { "epoch": 0.06, "learning_rate": 4.7105347422350305e-05, "loss": 1.9903, "step": 91400 }, { "epoch": 0.06, "learning_rate": 4.7102145373038745e-05, "loss": 1.9907, "step": 91500 }, { "epoch": 0.06, "learning_rate": 4.709894332372719e-05, "loss": 1.9744, "step": 91600 }, { "epoch": 0.06, "learning_rate": 4.7095741274415625e-05, "loss": 1.9824, "step": 91700 }, { "epoch": 0.06, "learning_rate": 4.709253922510407e-05, "loss": 1.9901, "step": 91800 }, { "epoch": 0.06, "learning_rate": 4.708933717579251e-05, "loss": 1.962, "step": 91900 }, { "epoch": 0.06, "learning_rate": 4.708613512648095e-05, "loss": 1.9679, "step": 92000 }, { "epoch": 0.06, "eval_loss": 1.8143820762634277, "eval_runtime": 95.7589, "eval_samples_per_second": 104.429, "eval_steps_per_second": 6.527, "step": 92000 }, { "epoch": 0.06, "learning_rate": 4.708293307716939e-05, "loss": 1.957, "step": 92100 }, { "epoch": 0.06, "learning_rate": 4.707973102785783e-05, "loss": 1.9769, "step": 92200 }, { "epoch": 0.06, "learning_rate": 4.707652897854627e-05, "loss": 1.9772, "step": 92300 }, { "epoch": 0.06, "learning_rate": 4.707332692923471e-05, "loss": 1.9736, "step": 92400 }, { "epoch": 0.06, "learning_rate": 4.7070124879923156e-05, "loss": 1.9706, "step": 92500 }, { "epoch": 0.06, "learning_rate": 4.7066922830611596e-05, "loss": 1.9801, "step": 92600 }, { "epoch": 0.06, "learning_rate": 4.7063720781300036e-05, "loss": 1.9859, "step": 92700 }, { "epoch": 0.06, "learning_rate": 4.7060518731988475e-05, "loss": 1.9701, "step": 92800 }, { "epoch": 0.06, "learning_rate": 4.7057316682676915e-05, "loss": 1.9879, "step": 92900 }, { "epoch": 0.06, "learning_rate": 4.7054114633365355e-05, "loss": 1.9879, "step": 93000 }, { "epoch": 0.06, "eval_loss": 1.8164808750152588, "eval_runtime": 97.027, "eval_samples_per_second": 103.064, "eval_steps_per_second": 6.442, "step": 93000 }, { "epoch": 0.06, "learning_rate": 4.7050912584053795e-05, "loss": 1.9867, "step": 93100 }, { "epoch": 0.06, "learning_rate": 4.704771053474224e-05, "loss": 1.9789, "step": 93200 }, { "epoch": 0.06, "learning_rate": 4.7044508485430674e-05, "loss": 1.9814, "step": 93300 }, { "epoch": 0.06, "learning_rate": 4.704130643611912e-05, "loss": 1.9529, "step": 93400 }, { "epoch": 0.06, "learning_rate": 4.703810438680756e-05, "loss": 1.9607, "step": 93500 }, { "epoch": 0.06, "learning_rate": 4.7034902337496e-05, "loss": 1.9978, "step": 93600 }, { "epoch": 0.06, "learning_rate": 4.703170028818444e-05, "loss": 1.967, "step": 93700 }, { "epoch": 0.06, "learning_rate": 4.702849823887288e-05, "loss": 1.9541, "step": 93800 }, { "epoch": 0.06, "learning_rate": 4.7025296189561326e-05, "loss": 1.9844, "step": 93900 }, { "epoch": 0.06, "learning_rate": 4.702209414024976e-05, "loss": 1.9829, "step": 94000 }, { "epoch": 0.06, "eval_loss": 1.8141056299209595, "eval_runtime": 97.6534, "eval_samples_per_second": 102.403, "eval_steps_per_second": 6.4, "step": 94000 }, { "epoch": 0.06, "learning_rate": 4.7018892090938205e-05, "loss": 1.9832, "step": 94100 }, { "epoch": 0.06, "learning_rate": 4.7015690041626645e-05, "loss": 1.9682, "step": 94200 }, { "epoch": 0.06, "learning_rate": 4.7012487992315085e-05, "loss": 1.9732, "step": 94300 }, { "epoch": 0.06, "learning_rate": 4.7009285943003525e-05, "loss": 1.9857, "step": 94400 }, { "epoch": 0.06, "learning_rate": 4.7006083893691964e-05, "loss": 1.9645, "step": 94500 }, { "epoch": 0.06, "learning_rate": 4.7002881844380404e-05, "loss": 1.9706, "step": 94600 }, { "epoch": 0.06, "learning_rate": 4.6999679795068844e-05, "loss": 1.9872, "step": 94700 }, { "epoch": 0.06, "learning_rate": 4.699647774575729e-05, "loss": 1.9728, "step": 94800 }, { "epoch": 0.06, "learning_rate": 4.6993275696445723e-05, "loss": 1.9865, "step": 94900 }, { "epoch": 0.06, "learning_rate": 4.699007364713417e-05, "loss": 1.9703, "step": 95000 }, { "epoch": 0.06, "eval_loss": 1.816051721572876, "eval_runtime": 101.0107, "eval_samples_per_second": 98.999, "eval_steps_per_second": 6.187, "step": 95000 }, { "epoch": 0.06, "learning_rate": 4.698687159782261e-05, "loss": 1.976, "step": 95100 }, { "epoch": 0.06, "learning_rate": 4.698366954851105e-05, "loss": 1.9807, "step": 95200 }, { "epoch": 0.06, "learning_rate": 4.698046749919949e-05, "loss": 1.9635, "step": 95300 }, { "epoch": 0.06, "learning_rate": 4.697726544988793e-05, "loss": 1.9818, "step": 95400 }, { "epoch": 0.06, "learning_rate": 4.6974063400576375e-05, "loss": 1.9865, "step": 95500 }, { "epoch": 0.06, "learning_rate": 4.697086135126481e-05, "loss": 1.9781, "step": 95600 }, { "epoch": 0.06, "learning_rate": 4.6967659301953255e-05, "loss": 1.9526, "step": 95700 }, { "epoch": 0.06, "learning_rate": 4.6964457252641695e-05, "loss": 1.9883, "step": 95800 }, { "epoch": 0.06, "learning_rate": 4.6961255203330134e-05, "loss": 1.9667, "step": 95900 }, { "epoch": 0.06, "learning_rate": 4.6958053154018574e-05, "loss": 1.9977, "step": 96000 }, { "epoch": 0.06, "eval_loss": 1.8094230890274048, "eval_runtime": 99.0088, "eval_samples_per_second": 101.001, "eval_steps_per_second": 6.313, "step": 96000 }, { "epoch": 0.06, "learning_rate": 4.6954851104707014e-05, "loss": 1.9528, "step": 96100 }, { "epoch": 0.06, "learning_rate": 4.695164905539546e-05, "loss": 1.9692, "step": 96200 }, { "epoch": 0.06, "learning_rate": 4.694844700608389e-05, "loss": 1.9874, "step": 96300 }, { "epoch": 0.06, "learning_rate": 4.694524495677234e-05, "loss": 1.9779, "step": 96400 }, { "epoch": 0.06, "learning_rate": 4.694204290746077e-05, "loss": 1.9785, "step": 96500 }, { "epoch": 0.06, "learning_rate": 4.693884085814922e-05, "loss": 1.9807, "step": 96600 }, { "epoch": 0.06, "learning_rate": 4.693563880883766e-05, "loss": 1.9897, "step": 96700 }, { "epoch": 0.06, "learning_rate": 4.69324367595261e-05, "loss": 1.9758, "step": 96800 }, { "epoch": 0.06, "learning_rate": 4.692923471021454e-05, "loss": 1.9734, "step": 96900 }, { "epoch": 0.06, "learning_rate": 4.692603266090298e-05, "loss": 1.9643, "step": 97000 }, { "epoch": 0.06, "eval_loss": 1.8144809007644653, "eval_runtime": 99.5406, "eval_samples_per_second": 100.462, "eval_steps_per_second": 6.279, "step": 97000 }, { "epoch": 0.06, "learning_rate": 4.6922830611591425e-05, "loss": 1.9585, "step": 97100 }, { "epoch": 0.06, "learning_rate": 4.691962856227986e-05, "loss": 1.9844, "step": 97200 }, { "epoch": 0.06, "learning_rate": 4.6916426512968304e-05, "loss": 1.9781, "step": 97300 }, { "epoch": 0.06, "learning_rate": 4.6913224463656744e-05, "loss": 1.9621, "step": 97400 }, { "epoch": 0.06, "learning_rate": 4.6910022414345184e-05, "loss": 1.9519, "step": 97500 }, { "epoch": 0.06, "learning_rate": 4.6906820365033623e-05, "loss": 1.9435, "step": 97600 }, { "epoch": 0.06, "learning_rate": 4.690361831572206e-05, "loss": 1.9556, "step": 97700 }, { "epoch": 0.06, "learning_rate": 4.690041626641051e-05, "loss": 1.9599, "step": 97800 }, { "epoch": 0.06, "learning_rate": 4.689721421709894e-05, "loss": 1.967, "step": 97900 }, { "epoch": 0.06, "learning_rate": 4.689401216778739e-05, "loss": 1.9527, "step": 98000 }, { "epoch": 0.06, "eval_loss": 1.8154406547546387, "eval_runtime": 96.9973, "eval_samples_per_second": 103.096, "eval_steps_per_second": 6.443, "step": 98000 }, { "epoch": 0.06, "learning_rate": 4.689081011847582e-05, "loss": 1.97, "step": 98100 }, { "epoch": 0.06, "learning_rate": 4.688760806916427e-05, "loss": 1.9785, "step": 98200 }, { "epoch": 0.06, "learning_rate": 4.688440601985271e-05, "loss": 1.9737, "step": 98300 }, { "epoch": 0.06, "learning_rate": 4.688120397054115e-05, "loss": 1.9584, "step": 98400 }, { "epoch": 0.06, "learning_rate": 4.6878001921229595e-05, "loss": 1.966, "step": 98500 }, { "epoch": 0.06, "learning_rate": 4.687479987191803e-05, "loss": 1.961, "step": 98600 }, { "epoch": 0.06, "learning_rate": 4.6871597822606474e-05, "loss": 1.9616, "step": 98700 }, { "epoch": 0.06, "learning_rate": 4.686839577329491e-05, "loss": 1.9509, "step": 98800 }, { "epoch": 0.06, "learning_rate": 4.6865193723983354e-05, "loss": 1.9604, "step": 98900 }, { "epoch": 0.06, "learning_rate": 4.686199167467179e-05, "loss": 1.9559, "step": 99000 }, { "epoch": 0.06, "eval_loss": 1.809850811958313, "eval_runtime": 94.7868, "eval_samples_per_second": 105.5, "eval_steps_per_second": 6.594, "step": 99000 }, { "epoch": 0.06, "learning_rate": 4.685878962536023e-05, "loss": 1.9935, "step": 99100 }, { "epoch": 0.06, "learning_rate": 4.685558757604867e-05, "loss": 1.9492, "step": 99200 }, { "epoch": 0.06, "learning_rate": 4.685238552673711e-05, "loss": 1.9837, "step": 99300 }, { "epoch": 0.06, "learning_rate": 4.684918347742556e-05, "loss": 1.9537, "step": 99400 }, { "epoch": 0.06, "learning_rate": 4.684598142811399e-05, "loss": 1.97, "step": 99500 }, { "epoch": 0.06, "learning_rate": 4.684277937880244e-05, "loss": 1.9468, "step": 99600 }, { "epoch": 0.06, "learning_rate": 4.683957732949087e-05, "loss": 1.9389, "step": 99700 }, { "epoch": 0.06, "learning_rate": 4.683637528017932e-05, "loss": 1.9787, "step": 99800 }, { "epoch": 0.06, "learning_rate": 4.683317323086776e-05, "loss": 1.9814, "step": 99900 }, { "epoch": 0.06, "learning_rate": 4.68299711815562e-05, "loss": 1.9562, "step": 100000 }, { "epoch": 0.06, "eval_loss": 1.809249997138977, "eval_runtime": 95.4999, "eval_samples_per_second": 104.712, "eval_steps_per_second": 6.545, "step": 100000 }, { "epoch": 0.06, "learning_rate": 4.6826769132244644e-05, "loss": 1.966, "step": 100100 }, { "epoch": 0.06, "learning_rate": 4.682356708293308e-05, "loss": 1.9655, "step": 100200 }, { "epoch": 0.06, "learning_rate": 4.6820365033621523e-05, "loss": 1.9498, "step": 100300 }, { "epoch": 0.06, "learning_rate": 4.6817162984309956e-05, "loss": 1.9548, "step": 100400 }, { "epoch": 0.06, "learning_rate": 4.68139609349984e-05, "loss": 1.9691, "step": 100500 }, { "epoch": 0.06, "learning_rate": 4.681075888568684e-05, "loss": 1.9629, "step": 100600 }, { "epoch": 0.06, "learning_rate": 4.680755683637528e-05, "loss": 1.9684, "step": 100700 }, { "epoch": 0.06, "learning_rate": 4.680435478706372e-05, "loss": 1.9741, "step": 100800 }, { "epoch": 0.06, "learning_rate": 4.680115273775216e-05, "loss": 1.9612, "step": 100900 }, { "epoch": 0.06, "learning_rate": 4.679795068844061e-05, "loss": 1.9692, "step": 101000 }, { "epoch": 0.06, "eval_loss": 1.8079750537872314, "eval_runtime": 97.771, "eval_samples_per_second": 102.28, "eval_steps_per_second": 6.392, "step": 101000 }, { "epoch": 0.06, "learning_rate": 4.679474863912904e-05, "loss": 1.9639, "step": 101100 }, { "epoch": 0.06, "learning_rate": 4.679154658981749e-05, "loss": 1.9702, "step": 101200 }, { "epoch": 0.06, "learning_rate": 4.678834454050592e-05, "loss": 1.9798, "step": 101300 }, { "epoch": 0.06, "learning_rate": 4.678514249119437e-05, "loss": 1.9616, "step": 101400 }, { "epoch": 0.06, "learning_rate": 4.678194044188281e-05, "loss": 1.9577, "step": 101500 }, { "epoch": 0.07, "learning_rate": 4.677873839257125e-05, "loss": 1.9452, "step": 101600 }, { "epoch": 0.07, "learning_rate": 4.6775536343259693e-05, "loss": 1.9693, "step": 101700 }, { "epoch": 0.07, "learning_rate": 4.6772334293948126e-05, "loss": 1.9628, "step": 101800 }, { "epoch": 0.07, "learning_rate": 4.676913224463657e-05, "loss": 1.958, "step": 101900 }, { "epoch": 0.07, "learning_rate": 4.6765930195325006e-05, "loss": 1.9537, "step": 102000 }, { "epoch": 0.07, "eval_loss": 1.808587908744812, "eval_runtime": 98.7882, "eval_samples_per_second": 101.227, "eval_steps_per_second": 6.327, "step": 102000 }, { "epoch": 0.07, "learning_rate": 4.676272814601345e-05, "loss": 1.9689, "step": 102100 }, { "epoch": 0.07, "learning_rate": 4.675952609670189e-05, "loss": 1.9652, "step": 102200 }, { "epoch": 0.07, "learning_rate": 4.675632404739033e-05, "loss": 1.9524, "step": 102300 }, { "epoch": 0.07, "learning_rate": 4.675312199807877e-05, "loss": 1.9746, "step": 102400 }, { "epoch": 0.07, "learning_rate": 4.674991994876721e-05, "loss": 1.98, "step": 102500 }, { "epoch": 0.07, "learning_rate": 4.674671789945566e-05, "loss": 1.9678, "step": 102600 }, { "epoch": 0.07, "learning_rate": 4.674351585014409e-05, "loss": 1.9384, "step": 102700 }, { "epoch": 0.07, "learning_rate": 4.674031380083254e-05, "loss": 1.9707, "step": 102800 }, { "epoch": 0.07, "learning_rate": 4.673711175152097e-05, "loss": 1.9691, "step": 102900 }, { "epoch": 0.07, "learning_rate": 4.673390970220942e-05, "loss": 1.9556, "step": 103000 }, { "epoch": 0.07, "eval_loss": 1.807947039604187, "eval_runtime": 101.6608, "eval_samples_per_second": 98.366, "eval_steps_per_second": 6.148, "step": 103000 }, { "epoch": 0.07, "learning_rate": 4.6730707652897857e-05, "loss": 1.9837, "step": 103100 }, { "epoch": 0.07, "learning_rate": 4.6727505603586296e-05, "loss": 1.9695, "step": 103200 }, { "epoch": 0.07, "learning_rate": 4.672430355427474e-05, "loss": 1.9514, "step": 103300 }, { "epoch": 0.07, "learning_rate": 4.6721101504963176e-05, "loss": 1.974, "step": 103400 }, { "epoch": 0.07, "learning_rate": 4.671789945565162e-05, "loss": 1.9659, "step": 103500 }, { "epoch": 0.07, "learning_rate": 4.6714697406340055e-05, "loss": 1.9672, "step": 103600 }, { "epoch": 0.07, "learning_rate": 4.67114953570285e-05, "loss": 1.9487, "step": 103700 }, { "epoch": 0.07, "learning_rate": 4.670829330771694e-05, "loss": 1.9391, "step": 103800 }, { "epoch": 0.07, "learning_rate": 4.670509125840538e-05, "loss": 1.9688, "step": 103900 }, { "epoch": 0.07, "learning_rate": 4.670188920909382e-05, "loss": 1.9457, "step": 104000 }, { "epoch": 0.07, "eval_loss": 1.807678461074829, "eval_runtime": 100.8784, "eval_samples_per_second": 99.129, "eval_steps_per_second": 6.196, "step": 104000 }, { "epoch": 0.07, "learning_rate": 4.669868715978226e-05, "loss": 1.9798, "step": 104100 }, { "epoch": 0.07, "learning_rate": 4.669548511047071e-05, "loss": 1.953, "step": 104200 }, { "epoch": 0.07, "learning_rate": 4.669228306115914e-05, "loss": 1.9507, "step": 104300 }, { "epoch": 0.07, "learning_rate": 4.668908101184759e-05, "loss": 1.9543, "step": 104400 }, { "epoch": 0.07, "learning_rate": 4.668587896253602e-05, "loss": 1.9665, "step": 104500 }, { "epoch": 0.07, "learning_rate": 4.6682676913224466e-05, "loss": 1.9674, "step": 104600 }, { "epoch": 0.07, "learning_rate": 4.6679474863912906e-05, "loss": 1.9589, "step": 104700 }, { "epoch": 0.07, "learning_rate": 4.6676272814601346e-05, "loss": 1.9645, "step": 104800 }, { "epoch": 0.07, "learning_rate": 4.667307076528979e-05, "loss": 1.9331, "step": 104900 }, { "epoch": 0.07, "learning_rate": 4.6669868715978225e-05, "loss": 1.9459, "step": 105000 }, { "epoch": 0.07, "eval_loss": 1.8105168342590332, "eval_runtime": 99.1576, "eval_samples_per_second": 100.85, "eval_steps_per_second": 6.303, "step": 105000 }, { "epoch": 0.07, "learning_rate": 4.666666666666667e-05, "loss": 1.9436, "step": 105100 }, { "epoch": 0.07, "learning_rate": 4.6663464617355105e-05, "loss": 1.943, "step": 105200 }, { "epoch": 0.07, "learning_rate": 4.666026256804355e-05, "loss": 1.9426, "step": 105300 }, { "epoch": 0.07, "learning_rate": 4.665706051873199e-05, "loss": 1.9635, "step": 105400 }, { "epoch": 0.07, "learning_rate": 4.665385846942043e-05, "loss": 1.9472, "step": 105500 }, { "epoch": 0.07, "learning_rate": 4.665065642010887e-05, "loss": 1.954, "step": 105600 }, { "epoch": 0.07, "learning_rate": 4.664745437079731e-05, "loss": 1.9531, "step": 105700 }, { "epoch": 0.07, "learning_rate": 4.6644252321485757e-05, "loss": 1.9432, "step": 105800 }, { "epoch": 0.07, "learning_rate": 4.664105027217419e-05, "loss": 1.9418, "step": 105900 }, { "epoch": 0.07, "learning_rate": 4.6637848222862636e-05, "loss": 1.9603, "step": 106000 }, { "epoch": 0.07, "eval_loss": 1.8081880807876587, "eval_runtime": 96.6002, "eval_samples_per_second": 103.519, "eval_steps_per_second": 6.47, "step": 106000 }, { "epoch": 0.07, "learning_rate": 4.6634646173551076e-05, "loss": 1.9442, "step": 106100 }, { "epoch": 0.07, "learning_rate": 4.6631444124239516e-05, "loss": 1.9546, "step": 106200 }, { "epoch": 0.07, "learning_rate": 4.6628242074927955e-05, "loss": 1.9537, "step": 106300 }, { "epoch": 0.07, "learning_rate": 4.6625040025616395e-05, "loss": 1.9453, "step": 106400 }, { "epoch": 0.07, "learning_rate": 4.662183797630484e-05, "loss": 1.946, "step": 106500 }, { "epoch": 0.07, "learning_rate": 4.6618635926993274e-05, "loss": 1.9632, "step": 106600 }, { "epoch": 0.07, "learning_rate": 4.661543387768172e-05, "loss": 1.9734, "step": 106700 }, { "epoch": 0.07, "learning_rate": 4.6612231828370154e-05, "loss": 1.9442, "step": 106800 }, { "epoch": 0.07, "learning_rate": 4.66090297790586e-05, "loss": 1.9459, "step": 106900 }, { "epoch": 0.07, "learning_rate": 4.660582772974704e-05, "loss": 1.9647, "step": 107000 }, { "epoch": 0.07, "eval_loss": 1.8084884881973267, "eval_runtime": 95.8026, "eval_samples_per_second": 104.381, "eval_steps_per_second": 6.524, "step": 107000 }, { "epoch": 0.07, "learning_rate": 4.660262568043548e-05, "loss": 1.9498, "step": 107100 }, { "epoch": 0.07, "learning_rate": 4.659942363112392e-05, "loss": 1.9426, "step": 107200 }, { "epoch": 0.07, "learning_rate": 4.659622158181236e-05, "loss": 1.9448, "step": 107300 }, { "epoch": 0.07, "learning_rate": 4.6593019532500806e-05, "loss": 1.9771, "step": 107400 }, { "epoch": 0.07, "learning_rate": 4.658981748318924e-05, "loss": 1.9468, "step": 107500 }, { "epoch": 0.07, "learning_rate": 4.6586615433877685e-05, "loss": 1.9377, "step": 107600 }, { "epoch": 0.07, "learning_rate": 4.6583413384566125e-05, "loss": 1.9321, "step": 107700 }, { "epoch": 0.07, "learning_rate": 4.6580211335254565e-05, "loss": 1.9625, "step": 107800 }, { "epoch": 0.07, "learning_rate": 4.6577009285943005e-05, "loss": 1.9476, "step": 107900 }, { "epoch": 0.07, "learning_rate": 4.6573807236631444e-05, "loss": 1.9469, "step": 108000 }, { "epoch": 0.07, "eval_loss": 1.8085095882415771, "eval_runtime": 97.8897, "eval_samples_per_second": 102.156, "eval_steps_per_second": 6.385, "step": 108000 }, { "epoch": 0.07, "learning_rate": 4.657060518731989e-05, "loss": 1.9417, "step": 108100 }, { "epoch": 0.07, "learning_rate": 4.6567403138008324e-05, "loss": 1.9316, "step": 108200 }, { "epoch": 0.07, "learning_rate": 4.656420108869677e-05, "loss": 1.9404, "step": 108300 }, { "epoch": 0.07, "learning_rate": 4.656099903938521e-05, "loss": 1.9577, "step": 108400 }, { "epoch": 0.07, "learning_rate": 4.655779699007365e-05, "loss": 1.9692, "step": 108500 }, { "epoch": 0.07, "learning_rate": 4.655459494076209e-05, "loss": 1.9515, "step": 108600 }, { "epoch": 0.07, "learning_rate": 4.655139289145053e-05, "loss": 1.9627, "step": 108700 }, { "epoch": 0.07, "learning_rate": 4.654819084213897e-05, "loss": 1.9393, "step": 108800 }, { "epoch": 0.07, "learning_rate": 4.654498879282741e-05, "loss": 1.9439, "step": 108900 }, { "epoch": 0.07, "learning_rate": 4.6541786743515855e-05, "loss": 1.9665, "step": 109000 }, { "epoch": 0.07, "eval_loss": 1.8042912483215332, "eval_runtime": 100.2071, "eval_samples_per_second": 99.793, "eval_steps_per_second": 6.237, "step": 109000 }, { "epoch": 0.07, "learning_rate": 4.653858469420429e-05, "loss": 1.9512, "step": 109100 }, { "epoch": 0.07, "learning_rate": 4.6535382644892735e-05, "loss": 1.9593, "step": 109200 }, { "epoch": 0.07, "learning_rate": 4.6532180595581175e-05, "loss": 1.9531, "step": 109300 }, { "epoch": 0.07, "learning_rate": 4.6528978546269614e-05, "loss": 1.9576, "step": 109400 }, { "epoch": 0.07, "learning_rate": 4.6525776496958054e-05, "loss": 1.9312, "step": 109500 }, { "epoch": 0.07, "learning_rate": 4.6522574447646494e-05, "loss": 1.9441, "step": 109600 }, { "epoch": 0.07, "learning_rate": 4.651937239833494e-05, "loss": 1.9575, "step": 109700 }, { "epoch": 0.07, "learning_rate": 4.651617034902337e-05, "loss": 1.9545, "step": 109800 }, { "epoch": 0.07, "learning_rate": 4.651296829971182e-05, "loss": 1.9606, "step": 109900 }, { "epoch": 0.07, "learning_rate": 4.650976625040026e-05, "loss": 1.9654, "step": 110000 }, { "epoch": 0.07, "eval_loss": 1.8042863607406616, "eval_runtime": 96.4684, "eval_samples_per_second": 103.661, "eval_steps_per_second": 6.479, "step": 110000 }, { "epoch": 0.07, "learning_rate": 4.65065642010887e-05, "loss": 1.952, "step": 110100 }, { "epoch": 0.07, "learning_rate": 4.650336215177714e-05, "loss": 1.9403, "step": 110200 }, { "epoch": 0.07, "learning_rate": 4.650016010246558e-05, "loss": 1.944, "step": 110300 }, { "epoch": 0.07, "learning_rate": 4.649695805315402e-05, "loss": 1.9419, "step": 110400 }, { "epoch": 0.07, "learning_rate": 4.649375600384246e-05, "loss": 1.9383, "step": 110500 }, { "epoch": 0.07, "learning_rate": 4.6490553954530905e-05, "loss": 1.9493, "step": 110600 }, { "epoch": 0.07, "learning_rate": 4.6487351905219344e-05, "loss": 1.9391, "step": 110700 }, { "epoch": 0.07, "learning_rate": 4.6484149855907784e-05, "loss": 1.9499, "step": 110800 }, { "epoch": 0.07, "learning_rate": 4.6480947806596224e-05, "loss": 1.9311, "step": 110900 }, { "epoch": 0.07, "learning_rate": 4.6477745757284664e-05, "loss": 1.9467, "step": 111000 }, { "epoch": 0.07, "eval_loss": 1.8050283193588257, "eval_runtime": 99.8993, "eval_samples_per_second": 100.101, "eval_steps_per_second": 6.256, "step": 111000 }, { "epoch": 0.07, "learning_rate": 4.64745437079731e-05, "loss": 1.9564, "step": 111100 }, { "epoch": 0.07, "learning_rate": 4.647134165866154e-05, "loss": 1.9384, "step": 111200 }, { "epoch": 0.07, "learning_rate": 4.646813960934999e-05, "loss": 1.955, "step": 111300 }, { "epoch": 0.07, "learning_rate": 4.646493756003843e-05, "loss": 1.9427, "step": 111400 }, { "epoch": 0.07, "learning_rate": 4.646173551072687e-05, "loss": 1.9505, "step": 111500 }, { "epoch": 0.07, "learning_rate": 4.645853346141531e-05, "loss": 1.9499, "step": 111600 }, { "epoch": 0.07, "learning_rate": 4.645533141210375e-05, "loss": 1.9634, "step": 111700 }, { "epoch": 0.07, "learning_rate": 4.645212936279219e-05, "loss": 1.9593, "step": 111800 }, { "epoch": 0.07, "learning_rate": 4.644892731348063e-05, "loss": 1.9587, "step": 111900 }, { "epoch": 0.07, "learning_rate": 4.644572526416907e-05, "loss": 1.9449, "step": 112000 }, { "epoch": 0.07, "eval_loss": 1.8044167757034302, "eval_runtime": 98.6832, "eval_samples_per_second": 101.334, "eval_steps_per_second": 6.333, "step": 112000 }, { "epoch": 0.07, "learning_rate": 4.644252321485751e-05, "loss": 1.9443, "step": 112100 }, { "epoch": 0.07, "learning_rate": 4.6439321165545954e-05, "loss": 1.955, "step": 112200 }, { "epoch": 0.07, "learning_rate": 4.6436119116234394e-05, "loss": 1.9431, "step": 112300 }, { "epoch": 0.07, "learning_rate": 4.6432917066922834e-05, "loss": 1.9232, "step": 112400 }, { "epoch": 0.07, "learning_rate": 4.642971501761127e-05, "loss": 1.9553, "step": 112500 }, { "epoch": 0.07, "learning_rate": 4.642651296829971e-05, "loss": 1.9675, "step": 112600 }, { "epoch": 0.07, "learning_rate": 4.642331091898815e-05, "loss": 1.9446, "step": 112700 }, { "epoch": 0.07, "learning_rate": 4.642010886967659e-05, "loss": 1.9356, "step": 112800 }, { "epoch": 0.07, "learning_rate": 4.641690682036504e-05, "loss": 1.9494, "step": 112900 }, { "epoch": 0.07, "learning_rate": 4.641370477105348e-05, "loss": 1.9596, "step": 113000 }, { "epoch": 0.07, "eval_loss": 1.804111361503601, "eval_runtime": 104.1532, "eval_samples_per_second": 96.012, "eval_steps_per_second": 6.001, "step": 113000 }, { "epoch": 0.07, "learning_rate": 4.641050272174192e-05, "loss": 1.9349, "step": 113100 }, { "epoch": 0.07, "learning_rate": 4.640730067243036e-05, "loss": 1.9661, "step": 113200 }, { "epoch": 0.07, "learning_rate": 4.64040986231188e-05, "loss": 1.956, "step": 113300 }, { "epoch": 0.07, "learning_rate": 4.640089657380724e-05, "loss": 1.9492, "step": 113400 }, { "epoch": 0.07, "learning_rate": 4.639769452449568e-05, "loss": 1.9573, "step": 113500 }, { "epoch": 0.07, "learning_rate": 4.639449247518412e-05, "loss": 1.9451, "step": 113600 }, { "epoch": 0.07, "learning_rate": 4.6391290425872564e-05, "loss": 1.9562, "step": 113700 }, { "epoch": 0.07, "learning_rate": 4.6388088376561003e-05, "loss": 1.9627, "step": 113800 }, { "epoch": 0.07, "learning_rate": 4.638488632724944e-05, "loss": 1.965, "step": 113900 }, { "epoch": 0.07, "learning_rate": 4.638168427793788e-05, "loss": 1.9599, "step": 114000 }, { "epoch": 0.07, "eval_loss": 1.8033814430236816, "eval_runtime": 96.53, "eval_samples_per_second": 103.595, "eval_steps_per_second": 6.475, "step": 114000 }, { "epoch": 0.07, "learning_rate": 4.637848222862632e-05, "loss": 1.956, "step": 114100 }, { "epoch": 0.07, "learning_rate": 4.637528017931476e-05, "loss": 1.9496, "step": 114200 }, { "epoch": 0.07, "learning_rate": 4.63720781300032e-05, "loss": 1.9513, "step": 114300 }, { "epoch": 0.07, "learning_rate": 4.636887608069164e-05, "loss": 1.9497, "step": 114400 }, { "epoch": 0.07, "learning_rate": 4.636567403138009e-05, "loss": 1.9618, "step": 114500 }, { "epoch": 0.07, "learning_rate": 4.636247198206853e-05, "loss": 1.9436, "step": 114600 }, { "epoch": 0.07, "learning_rate": 4.635926993275697e-05, "loss": 1.9528, "step": 114700 }, { "epoch": 0.07, "learning_rate": 4.635606788344541e-05, "loss": 1.9404, "step": 114800 }, { "epoch": 0.07, "learning_rate": 4.635286583413385e-05, "loss": 1.9365, "step": 114900 }, { "epoch": 0.07, "learning_rate": 4.634966378482229e-05, "loss": 1.9368, "step": 115000 }, { "epoch": 0.07, "eval_loss": 1.802424430847168, "eval_runtime": 100.506, "eval_samples_per_second": 99.497, "eval_steps_per_second": 6.219, "step": 115000 }, { "epoch": 0.07, "learning_rate": 4.634646173551073e-05, "loss": 1.9303, "step": 115100 }, { "epoch": 0.07, "learning_rate": 4.6343259686199167e-05, "loss": 1.9404, "step": 115200 }, { "epoch": 0.07, "learning_rate": 4.634005763688761e-05, "loss": 1.9351, "step": 115300 }, { "epoch": 0.07, "learning_rate": 4.633685558757605e-05, "loss": 1.9653, "step": 115400 }, { "epoch": 0.07, "learning_rate": 4.633365353826449e-05, "loss": 1.9609, "step": 115500 }, { "epoch": 0.07, "learning_rate": 4.633045148895293e-05, "loss": 1.9671, "step": 115600 }, { "epoch": 0.07, "learning_rate": 4.632724943964137e-05, "loss": 1.9486, "step": 115700 }, { "epoch": 0.07, "learning_rate": 4.632404739032981e-05, "loss": 1.9493, "step": 115800 }, { "epoch": 0.07, "learning_rate": 4.632084534101825e-05, "loss": 1.9477, "step": 115900 }, { "epoch": 0.07, "learning_rate": 4.63176432917067e-05, "loss": 1.955, "step": 116000 }, { "epoch": 0.07, "eval_loss": 1.8018429279327393, "eval_runtime": 95.6338, "eval_samples_per_second": 104.566, "eval_steps_per_second": 6.535, "step": 116000 }, { "epoch": 0.07, "learning_rate": 4.631444124239514e-05, "loss": 1.9574, "step": 116100 }, { "epoch": 0.07, "learning_rate": 4.631123919308358e-05, "loss": 1.9535, "step": 116200 }, { "epoch": 0.07, "learning_rate": 4.630803714377202e-05, "loss": 1.9403, "step": 116300 }, { "epoch": 0.07, "learning_rate": 4.630483509446046e-05, "loss": 1.9503, "step": 116400 }, { "epoch": 0.07, "learning_rate": 4.63016330451489e-05, "loss": 1.9494, "step": 116500 }, { "epoch": 0.07, "learning_rate": 4.6298430995837336e-05, "loss": 1.9371, "step": 116600 }, { "epoch": 0.07, "learning_rate": 4.6295228946525776e-05, "loss": 1.9619, "step": 116700 }, { "epoch": 0.07, "learning_rate": 4.6292026897214216e-05, "loss": 1.9452, "step": 116800 }, { "epoch": 0.07, "learning_rate": 4.628882484790266e-05, "loss": 1.9467, "step": 116900 }, { "epoch": 0.07, "learning_rate": 4.62856227985911e-05, "loss": 1.9431, "step": 117000 }, { "epoch": 0.07, "eval_loss": 1.7981681823730469, "eval_runtime": 97.4478, "eval_samples_per_second": 102.619, "eval_steps_per_second": 6.414, "step": 117000 }, { "epoch": 0.07, "learning_rate": 4.628242074927954e-05, "loss": 1.9395, "step": 117100 }, { "epoch": 0.08, "learning_rate": 4.627921869996798e-05, "loss": 1.9343, "step": 117200 }, { "epoch": 0.08, "learning_rate": 4.627601665065642e-05, "loss": 1.9486, "step": 117300 }, { "epoch": 0.08, "learning_rate": 4.627281460134486e-05, "loss": 1.9516, "step": 117400 }, { "epoch": 0.08, "learning_rate": 4.62696125520333e-05, "loss": 1.9586, "step": 117500 }, { "epoch": 0.08, "learning_rate": 4.626641050272175e-05, "loss": 1.9271, "step": 117600 }, { "epoch": 0.08, "learning_rate": 4.626320845341019e-05, "loss": 1.932, "step": 117700 }, { "epoch": 0.08, "learning_rate": 4.626000640409863e-05, "loss": 1.9388, "step": 117800 }, { "epoch": 0.08, "learning_rate": 4.6256804354787067e-05, "loss": 1.9357, "step": 117900 }, { "epoch": 0.08, "learning_rate": 4.6253602305475506e-05, "loss": 1.9468, "step": 118000 }, { "epoch": 0.08, "eval_loss": 1.7984724044799805, "eval_runtime": 97.012, "eval_samples_per_second": 103.08, "eval_steps_per_second": 6.443, "step": 118000 }, { "epoch": 0.08, "learning_rate": 4.6250400256163946e-05, "loss": 1.9462, "step": 118100 }, { "epoch": 0.08, "learning_rate": 4.6247198206852386e-05, "loss": 1.9407, "step": 118200 }, { "epoch": 0.08, "learning_rate": 4.624399615754083e-05, "loss": 1.941, "step": 118300 }, { "epoch": 0.08, "learning_rate": 4.6240794108229265e-05, "loss": 1.9189, "step": 118400 }, { "epoch": 0.08, "learning_rate": 4.623759205891771e-05, "loss": 1.9613, "step": 118500 }, { "epoch": 0.08, "learning_rate": 4.623439000960615e-05, "loss": 1.9349, "step": 118600 }, { "epoch": 0.08, "learning_rate": 4.623118796029459e-05, "loss": 1.9292, "step": 118700 }, { "epoch": 0.08, "learning_rate": 4.622798591098303e-05, "loss": 1.948, "step": 118800 }, { "epoch": 0.08, "learning_rate": 4.622478386167147e-05, "loss": 1.9217, "step": 118900 }, { "epoch": 0.08, "learning_rate": 4.622158181235991e-05, "loss": 1.9331, "step": 119000 }, { "epoch": 0.08, "eval_loss": 1.8019925355911255, "eval_runtime": 98.2254, "eval_samples_per_second": 101.807, "eval_steps_per_second": 6.363, "step": 119000 }, { "epoch": 0.08, "learning_rate": 4.621837976304835e-05, "loss": 1.9405, "step": 119100 }, { "epoch": 0.08, "learning_rate": 4.62151777137368e-05, "loss": 1.9606, "step": 119200 }, { "epoch": 0.08, "learning_rate": 4.6211975664425236e-05, "loss": 1.9414, "step": 119300 }, { "epoch": 0.08, "learning_rate": 4.6208773615113676e-05, "loss": 1.9311, "step": 119400 }, { "epoch": 0.08, "learning_rate": 4.6205571565802116e-05, "loss": 1.9407, "step": 119500 }, { "epoch": 0.08, "learning_rate": 4.6202369516490556e-05, "loss": 1.9261, "step": 119600 }, { "epoch": 0.08, "learning_rate": 4.6199167467178995e-05, "loss": 1.9465, "step": 119700 }, { "epoch": 0.08, "learning_rate": 4.6195965417867435e-05, "loss": 1.9481, "step": 119800 }, { "epoch": 0.08, "learning_rate": 4.619276336855588e-05, "loss": 1.918, "step": 119900 }, { "epoch": 0.08, "learning_rate": 4.6189561319244315e-05, "loss": 1.9639, "step": 120000 }, { "epoch": 0.08, "eval_loss": 1.7974175214767456, "eval_runtime": 99.9798, "eval_samples_per_second": 100.02, "eval_steps_per_second": 6.251, "step": 120000 }, { "epoch": 0.08, "learning_rate": 4.618635926993276e-05, "loss": 1.9133, "step": 120100 }, { "epoch": 0.08, "learning_rate": 4.61831572206212e-05, "loss": 1.9458, "step": 120200 }, { "epoch": 0.08, "learning_rate": 4.617995517130964e-05, "loss": 1.938, "step": 120300 }, { "epoch": 0.08, "learning_rate": 4.617675312199808e-05, "loss": 1.9522, "step": 120400 }, { "epoch": 0.08, "learning_rate": 4.617355107268652e-05, "loss": 1.9299, "step": 120500 }, { "epoch": 0.08, "learning_rate": 4.617034902337497e-05, "loss": 1.9306, "step": 120600 }, { "epoch": 0.08, "learning_rate": 4.61671469740634e-05, "loss": 1.923, "step": 120700 }, { "epoch": 0.08, "learning_rate": 4.6163944924751846e-05, "loss": 1.9327, "step": 120800 }, { "epoch": 0.08, "learning_rate": 4.6160742875440286e-05, "loss": 1.9261, "step": 120900 }, { "epoch": 0.08, "learning_rate": 4.6157540826128726e-05, "loss": 1.9312, "step": 121000 }, { "epoch": 0.08, "eval_loss": 1.7995890378952026, "eval_runtime": 101.6755, "eval_samples_per_second": 98.352, "eval_steps_per_second": 6.147, "step": 121000 }, { "epoch": 0.08, "learning_rate": 4.6154338776817165e-05, "loss": 1.9479, "step": 121100 }, { "epoch": 0.08, "learning_rate": 4.6151136727505605e-05, "loss": 1.9204, "step": 121200 }, { "epoch": 0.08, "learning_rate": 4.614793467819405e-05, "loss": 1.9212, "step": 121300 }, { "epoch": 0.08, "learning_rate": 4.6144732628882485e-05, "loss": 1.9453, "step": 121400 }, { "epoch": 0.08, "learning_rate": 4.614153057957093e-05, "loss": 1.9549, "step": 121500 }, { "epoch": 0.08, "learning_rate": 4.6138328530259364e-05, "loss": 1.9577, "step": 121600 }, { "epoch": 0.08, "learning_rate": 4.613512648094781e-05, "loss": 1.9332, "step": 121700 }, { "epoch": 0.08, "learning_rate": 4.613192443163625e-05, "loss": 1.9285, "step": 121800 }, { "epoch": 0.08, "learning_rate": 4.612872238232469e-05, "loss": 1.9248, "step": 121900 }, { "epoch": 0.08, "learning_rate": 4.612552033301313e-05, "loss": 1.9394, "step": 122000 }, { "epoch": 0.08, "eval_loss": 1.7989450693130493, "eval_runtime": 103.313, "eval_samples_per_second": 96.793, "eval_steps_per_second": 6.05, "step": 122000 }, { "epoch": 0.08, "learning_rate": 4.612231828370157e-05, "loss": 1.9346, "step": 122100 }, { "epoch": 0.08, "learning_rate": 4.6119116234390016e-05, "loss": 1.9513, "step": 122200 }, { "epoch": 0.08, "learning_rate": 4.611591418507845e-05, "loss": 1.9315, "step": 122300 }, { "epoch": 0.08, "learning_rate": 4.6112712135766895e-05, "loss": 1.9502, "step": 122400 }, { "epoch": 0.08, "learning_rate": 4.6109510086455335e-05, "loss": 1.924, "step": 122500 }, { "epoch": 0.08, "learning_rate": 4.6106308037143775e-05, "loss": 1.9403, "step": 122600 }, { "epoch": 0.08, "learning_rate": 4.6103105987832215e-05, "loss": 1.9469, "step": 122700 }, { "epoch": 0.08, "learning_rate": 4.6099903938520654e-05, "loss": 1.9489, "step": 122800 }, { "epoch": 0.08, "learning_rate": 4.60967018892091e-05, "loss": 1.9616, "step": 122900 }, { "epoch": 0.08, "learning_rate": 4.6093499839897534e-05, "loss": 1.9822, "step": 123000 }, { "epoch": 0.08, "eval_loss": 1.7943445444107056, "eval_runtime": 98.1232, "eval_samples_per_second": 101.913, "eval_steps_per_second": 6.37, "step": 123000 }, { "epoch": 0.08, "learning_rate": 4.609029779058598e-05, "loss": 1.925, "step": 123100 }, { "epoch": 0.08, "learning_rate": 4.6087095741274413e-05, "loss": 1.9401, "step": 123200 }, { "epoch": 0.08, "learning_rate": 4.608389369196286e-05, "loss": 1.9203, "step": 123300 }, { "epoch": 0.08, "learning_rate": 4.60806916426513e-05, "loss": 1.9447, "step": 123400 }, { "epoch": 0.08, "learning_rate": 4.607748959333974e-05, "loss": 1.938, "step": 123500 }, { "epoch": 0.08, "learning_rate": 4.6074287544028186e-05, "loss": 1.9524, "step": 123600 }, { "epoch": 0.08, "learning_rate": 4.607108549471662e-05, "loss": 1.9282, "step": 123700 }, { "epoch": 0.08, "learning_rate": 4.6067883445405065e-05, "loss": 1.9174, "step": 123800 }, { "epoch": 0.08, "learning_rate": 4.60646813960935e-05, "loss": 1.9309, "step": 123900 }, { "epoch": 0.08, "learning_rate": 4.6061479346781945e-05, "loss": 1.9436, "step": 124000 }, { "epoch": 0.08, "eval_loss": 1.797054648399353, "eval_runtime": 95.8553, "eval_samples_per_second": 104.324, "eval_steps_per_second": 6.52, "step": 124000 }, { "epoch": 0.08, "learning_rate": 4.6058277297470385e-05, "loss": 1.9076, "step": 124100 }, { "epoch": 0.08, "learning_rate": 4.6055075248158824e-05, "loss": 1.9325, "step": 124200 }, { "epoch": 0.08, "learning_rate": 4.6051873198847264e-05, "loss": 1.9199, "step": 124300 }, { "epoch": 0.08, "learning_rate": 4.6048671149535704e-05, "loss": 1.9295, "step": 124400 }, { "epoch": 0.08, "learning_rate": 4.604546910022415e-05, "loss": 1.9088, "step": 124500 }, { "epoch": 0.08, "learning_rate": 4.604226705091258e-05, "loss": 1.9253, "step": 124600 }, { "epoch": 0.08, "learning_rate": 4.603906500160103e-05, "loss": 1.9401, "step": 124700 }, { "epoch": 0.08, "learning_rate": 4.603586295228946e-05, "loss": 1.9219, "step": 124800 }, { "epoch": 0.08, "learning_rate": 4.603266090297791e-05, "loss": 1.9375, "step": 124900 }, { "epoch": 0.08, "learning_rate": 4.602945885366635e-05, "loss": 1.9327, "step": 125000 }, { "epoch": 0.08, "eval_loss": 1.7967554330825806, "eval_runtime": 98.1387, "eval_samples_per_second": 101.897, "eval_steps_per_second": 6.369, "step": 125000 }, { "epoch": 0.08, "learning_rate": 4.602625680435479e-05, "loss": 1.9278, "step": 125100 }, { "epoch": 0.08, "learning_rate": 4.6023054755043235e-05, "loss": 1.9298, "step": 125200 }, { "epoch": 0.08, "learning_rate": 4.601985270573167e-05, "loss": 1.9104, "step": 125300 }, { "epoch": 0.08, "learning_rate": 4.6016650656420115e-05, "loss": 1.9581, "step": 125400 }, { "epoch": 0.08, "learning_rate": 4.601344860710855e-05, "loss": 1.9226, "step": 125500 }, { "epoch": 0.08, "learning_rate": 4.6010246557796994e-05, "loss": 1.9373, "step": 125600 }, { "epoch": 0.08, "learning_rate": 4.6007044508485434e-05, "loss": 1.9295, "step": 125700 }, { "epoch": 0.08, "learning_rate": 4.6003842459173874e-05, "loss": 1.9475, "step": 125800 }, { "epoch": 0.08, "learning_rate": 4.6000640409862313e-05, "loss": 1.9348, "step": 125900 }, { "epoch": 0.08, "learning_rate": 4.599743836055075e-05, "loss": 1.937, "step": 126000 }, { "epoch": 0.08, "eval_loss": 1.7935060262680054, "eval_runtime": 96.8927, "eval_samples_per_second": 103.207, "eval_steps_per_second": 6.45, "step": 126000 }, { "epoch": 0.08, "learning_rate": 4.59942363112392e-05, "loss": 1.9256, "step": 126100 }, { "epoch": 0.08, "learning_rate": 4.599103426192763e-05, "loss": 1.9172, "step": 126200 }, { "epoch": 0.08, "learning_rate": 4.598783221261608e-05, "loss": 1.9274, "step": 126300 }, { "epoch": 0.08, "learning_rate": 4.598463016330451e-05, "loss": 1.9401, "step": 126400 }, { "epoch": 0.08, "learning_rate": 4.598142811399296e-05, "loss": 1.9325, "step": 126500 }, { "epoch": 0.08, "learning_rate": 4.59782260646814e-05, "loss": 1.9396, "step": 126600 }, { "epoch": 0.08, "learning_rate": 4.597502401536984e-05, "loss": 1.9273, "step": 126700 }, { "epoch": 0.08, "learning_rate": 4.5971821966058285e-05, "loss": 1.9375, "step": 126800 }, { "epoch": 0.08, "learning_rate": 4.596861991674672e-05, "loss": 1.9261, "step": 126900 }, { "epoch": 0.08, "learning_rate": 4.5965417867435164e-05, "loss": 1.9453, "step": 127000 }, { "epoch": 0.08, "eval_loss": 1.7986115217208862, "eval_runtime": 99.296, "eval_samples_per_second": 100.709, "eval_steps_per_second": 6.294, "step": 127000 }, { "epoch": 0.08, "learning_rate": 4.59622158181236e-05, "loss": 1.9231, "step": 127100 }, { "epoch": 0.08, "learning_rate": 4.5959013768812044e-05, "loss": 1.9407, "step": 127200 }, { "epoch": 0.08, "learning_rate": 4.595581171950048e-05, "loss": 1.9228, "step": 127300 }, { "epoch": 0.08, "learning_rate": 4.595260967018892e-05, "loss": 1.9341, "step": 127400 }, { "epoch": 0.08, "learning_rate": 4.594940762087736e-05, "loss": 1.9503, "step": 127500 }, { "epoch": 0.08, "learning_rate": 4.59462055715658e-05, "loss": 1.9381, "step": 127600 }, { "epoch": 0.08, "learning_rate": 4.594300352225425e-05, "loss": 1.9208, "step": 127700 }, { "epoch": 0.08, "learning_rate": 4.593980147294268e-05, "loss": 1.917, "step": 127800 }, { "epoch": 0.08, "learning_rate": 4.593659942363113e-05, "loss": 1.9197, "step": 127900 }, { "epoch": 0.08, "learning_rate": 4.593339737431956e-05, "loss": 1.9152, "step": 128000 }, { "epoch": 0.08, "eval_loss": 1.7988296747207642, "eval_runtime": 100.0152, "eval_samples_per_second": 99.985, "eval_steps_per_second": 6.249, "step": 128000 }, { "epoch": 0.08, "learning_rate": 4.593019532500801e-05, "loss": 1.9131, "step": 128100 }, { "epoch": 0.08, "learning_rate": 4.592699327569645e-05, "loss": 1.9163, "step": 128200 }, { "epoch": 0.08, "learning_rate": 4.592379122638489e-05, "loss": 1.9479, "step": 128300 }, { "epoch": 0.08, "learning_rate": 4.5920589177073334e-05, "loss": 1.923, "step": 128400 }, { "epoch": 0.08, "learning_rate": 4.591738712776177e-05, "loss": 1.9196, "step": 128500 }, { "epoch": 0.08, "learning_rate": 4.5914185078450213e-05, "loss": 1.9289, "step": 128600 }, { "epoch": 0.08, "learning_rate": 4.5910983029138646e-05, "loss": 1.9095, "step": 128700 }, { "epoch": 0.08, "learning_rate": 4.590778097982709e-05, "loss": 1.9402, "step": 128800 }, { "epoch": 0.08, "learning_rate": 4.590457893051553e-05, "loss": 1.9273, "step": 128900 }, { "epoch": 0.08, "learning_rate": 4.590137688120397e-05, "loss": 1.9195, "step": 129000 }, { "epoch": 0.08, "eval_loss": 1.7958400249481201, "eval_runtime": 99.8675, "eval_samples_per_second": 100.133, "eval_steps_per_second": 6.258, "step": 129000 }, { "epoch": 0.08, "learning_rate": 4.589817483189241e-05, "loss": 1.9039, "step": 129100 }, { "epoch": 0.08, "learning_rate": 4.589497278258085e-05, "loss": 1.9269, "step": 129200 }, { "epoch": 0.08, "learning_rate": 4.58917707332693e-05, "loss": 1.9003, "step": 129300 }, { "epoch": 0.08, "learning_rate": 4.588856868395773e-05, "loss": 1.9161, "step": 129400 }, { "epoch": 0.08, "learning_rate": 4.588536663464618e-05, "loss": 1.931, "step": 129500 }, { "epoch": 0.08, "learning_rate": 4.588216458533461e-05, "loss": 1.9244, "step": 129600 }, { "epoch": 0.08, "learning_rate": 4.587896253602306e-05, "loss": 1.9164, "step": 129700 }, { "epoch": 0.08, "learning_rate": 4.58757604867115e-05, "loss": 1.9414, "step": 129800 }, { "epoch": 0.08, "learning_rate": 4.587255843739994e-05, "loss": 1.9277, "step": 129900 }, { "epoch": 0.08, "learning_rate": 4.586935638808838e-05, "loss": 1.9228, "step": 130000 }, { "epoch": 0.08, "eval_loss": 1.7947413921356201, "eval_runtime": 98.8535, "eval_samples_per_second": 101.16, "eval_steps_per_second": 6.322, "step": 130000 }, { "epoch": 0.08, "learning_rate": 4.5866154338776816e-05, "loss": 1.93, "step": 130100 }, { "epoch": 0.08, "learning_rate": 4.586295228946526e-05, "loss": 1.9164, "step": 130200 }, { "epoch": 0.08, "learning_rate": 4.5859750240153696e-05, "loss": 1.9347, "step": 130300 }, { "epoch": 0.08, "learning_rate": 4.585654819084214e-05, "loss": 1.9258, "step": 130400 }, { "epoch": 0.08, "learning_rate": 4.585334614153058e-05, "loss": 1.9021, "step": 130500 }, { "epoch": 0.08, "learning_rate": 4.585014409221902e-05, "loss": 1.9157, "step": 130600 }, { "epoch": 0.08, "learning_rate": 4.584694204290746e-05, "loss": 1.9207, "step": 130700 }, { "epoch": 0.08, "learning_rate": 4.58437399935959e-05, "loss": 1.9297, "step": 130800 }, { "epoch": 0.08, "learning_rate": 4.584053794428435e-05, "loss": 1.9065, "step": 130900 }, { "epoch": 0.08, "learning_rate": 4.583733589497278e-05, "loss": 1.9124, "step": 131000 }, { "epoch": 0.08, "eval_loss": 1.7891453504562378, "eval_runtime": 101.2955, "eval_samples_per_second": 98.721, "eval_steps_per_second": 6.17, "step": 131000 }, { "epoch": 0.08, "learning_rate": 4.583413384566123e-05, "loss": 1.9084, "step": 131100 }, { "epoch": 0.08, "learning_rate": 4.583093179634967e-05, "loss": 1.9361, "step": 131200 }, { "epoch": 0.08, "learning_rate": 4.582772974703811e-05, "loss": 1.907, "step": 131300 }, { "epoch": 0.08, "learning_rate": 4.5824527697726547e-05, "loss": 1.9083, "step": 131400 }, { "epoch": 0.08, "learning_rate": 4.5821325648414986e-05, "loss": 1.9235, "step": 131500 }, { "epoch": 0.08, "learning_rate": 4.581812359910343e-05, "loss": 1.9185, "step": 131600 }, { "epoch": 0.08, "learning_rate": 4.5814921549791866e-05, "loss": 1.9274, "step": 131700 }, { "epoch": 0.08, "learning_rate": 4.581171950048031e-05, "loss": 1.9107, "step": 131800 }, { "epoch": 0.08, "learning_rate": 4.5808517451168745e-05, "loss": 1.9337, "step": 131900 }, { "epoch": 0.08, "learning_rate": 4.580531540185719e-05, "loss": 1.9393, "step": 132000 }, { "epoch": 0.08, "eval_loss": 1.792814016342163, "eval_runtime": 104.4656, "eval_samples_per_second": 95.725, "eval_steps_per_second": 5.983, "step": 132000 }, { "epoch": 0.08, "learning_rate": 4.580211335254563e-05, "loss": 1.9394, "step": 132100 }, { "epoch": 0.08, "learning_rate": 4.579891130323407e-05, "loss": 1.8981, "step": 132200 }, { "epoch": 0.08, "learning_rate": 4.579570925392251e-05, "loss": 1.9104, "step": 132300 }, { "epoch": 0.08, "learning_rate": 4.579250720461095e-05, "loss": 1.9286, "step": 132400 }, { "epoch": 0.08, "learning_rate": 4.57893051552994e-05, "loss": 1.9232, "step": 132500 }, { "epoch": 0.08, "learning_rate": 4.578610310598783e-05, "loss": 1.9222, "step": 132600 }, { "epoch": 0.08, "learning_rate": 4.578290105667628e-05, "loss": 1.9286, "step": 132700 }, { "epoch": 0.08, "learning_rate": 4.5779699007364716e-05, "loss": 1.9234, "step": 132800 }, { "epoch": 0.09, "learning_rate": 4.5776496958053156e-05, "loss": 1.9373, "step": 132900 }, { "epoch": 0.09, "learning_rate": 4.5773294908741596e-05, "loss": 1.902, "step": 133000 }, { "epoch": 0.09, "eval_loss": 1.7906956672668457, "eval_runtime": 96.7052, "eval_samples_per_second": 103.407, "eval_steps_per_second": 6.463, "step": 133000 }, { "epoch": 0.09, "learning_rate": 4.5770092859430036e-05, "loss": 1.9178, "step": 133100 }, { "epoch": 0.09, "learning_rate": 4.576689081011848e-05, "loss": 1.9279, "step": 133200 }, { "epoch": 0.09, "learning_rate": 4.5763688760806915e-05, "loss": 1.9327, "step": 133300 }, { "epoch": 0.09, "learning_rate": 4.576048671149536e-05, "loss": 1.9329, "step": 133400 }, { "epoch": 0.09, "learning_rate": 4.57572846621838e-05, "loss": 1.9178, "step": 133500 }, { "epoch": 0.09, "learning_rate": 4.575408261287224e-05, "loss": 1.9041, "step": 133600 }, { "epoch": 0.09, "learning_rate": 4.575088056356068e-05, "loss": 1.901, "step": 133700 }, { "epoch": 0.09, "learning_rate": 4.574767851424912e-05, "loss": 1.9292, "step": 133800 }, { "epoch": 0.09, "learning_rate": 4.574447646493756e-05, "loss": 1.9084, "step": 133900 }, { "epoch": 0.09, "learning_rate": 4.5741274415626e-05, "loss": 1.9215, "step": 134000 }, { "epoch": 0.09, "eval_loss": 1.7979042530059814, "eval_runtime": 97.0117, "eval_samples_per_second": 103.08, "eval_steps_per_second": 6.443, "step": 134000 }, { "epoch": 0.09, "learning_rate": 4.5738072366314447e-05, "loss": 1.9157, "step": 134100 }, { "epoch": 0.09, "learning_rate": 4.573487031700288e-05, "loss": 1.9265, "step": 134200 }, { "epoch": 0.09, "learning_rate": 4.5731668267691326e-05, "loss": 1.9187, "step": 134300 }, { "epoch": 0.09, "learning_rate": 4.5728466218379766e-05, "loss": 1.9249, "step": 134400 }, { "epoch": 0.09, "learning_rate": 4.5725264169068206e-05, "loss": 1.9113, "step": 134500 }, { "epoch": 0.09, "learning_rate": 4.5722062119756645e-05, "loss": 1.9134, "step": 134600 }, { "epoch": 0.09, "learning_rate": 4.5718860070445085e-05, "loss": 1.9244, "step": 134700 }, { "epoch": 0.09, "learning_rate": 4.571565802113353e-05, "loss": 1.921, "step": 134800 }, { "epoch": 0.09, "learning_rate": 4.5712455971821964e-05, "loss": 1.9189, "step": 134900 }, { "epoch": 0.09, "learning_rate": 4.570925392251041e-05, "loss": 1.9396, "step": 135000 }, { "epoch": 0.09, "eval_loss": 1.792456865310669, "eval_runtime": 96.4087, "eval_samples_per_second": 103.725, "eval_steps_per_second": 6.483, "step": 135000 }, { "epoch": 0.09, "learning_rate": 4.570605187319885e-05, "loss": 1.9283, "step": 135100 }, { "epoch": 0.09, "learning_rate": 4.570284982388729e-05, "loss": 1.9214, "step": 135200 }, { "epoch": 0.09, "learning_rate": 4.569964777457573e-05, "loss": 1.9293, "step": 135300 }, { "epoch": 0.09, "learning_rate": 4.569644572526417e-05, "loss": 1.9177, "step": 135400 }, { "epoch": 0.09, "learning_rate": 4.569324367595261e-05, "loss": 1.9242, "step": 135500 }, { "epoch": 0.09, "learning_rate": 4.569004162664105e-05, "loss": 1.9212, "step": 135600 }, { "epoch": 0.09, "learning_rate": 4.5686839577329496e-05, "loss": 1.936, "step": 135700 }, { "epoch": 0.09, "learning_rate": 4.5683637528017936e-05, "loss": 1.9181, "step": 135800 }, { "epoch": 0.09, "learning_rate": 4.5680435478706375e-05, "loss": 1.9272, "step": 135900 }, { "epoch": 0.09, "learning_rate": 4.5677233429394815e-05, "loss": 1.9166, "step": 136000 }, { "epoch": 0.09, "eval_loss": 1.7942609786987305, "eval_runtime": 94.0526, "eval_samples_per_second": 106.323, "eval_steps_per_second": 6.645, "step": 136000 }, { "epoch": 0.09, "learning_rate": 4.5674031380083255e-05, "loss": 1.8882, "step": 136100 }, { "epoch": 0.09, "learning_rate": 4.5670829330771695e-05, "loss": 1.9245, "step": 136200 }, { "epoch": 0.09, "learning_rate": 4.5667627281460134e-05, "loss": 1.9037, "step": 136300 }, { "epoch": 0.09, "learning_rate": 4.566442523214858e-05, "loss": 1.8995, "step": 136400 }, { "epoch": 0.09, "learning_rate": 4.5661223182837014e-05, "loss": 1.9345, "step": 136500 }, { "epoch": 0.09, "learning_rate": 4.565802113352546e-05, "loss": 1.9215, "step": 136600 }, { "epoch": 0.09, "learning_rate": 4.56548190842139e-05, "loss": 1.92, "step": 136700 }, { "epoch": 0.09, "learning_rate": 4.565161703490234e-05, "loss": 1.9146, "step": 136800 }, { "epoch": 0.09, "learning_rate": 4.564841498559078e-05, "loss": 1.9071, "step": 136900 }, { "epoch": 0.09, "learning_rate": 4.564521293627922e-05, "loss": 1.8991, "step": 137000 }, { "epoch": 0.09, "eval_loss": 1.788527011871338, "eval_runtime": 94.7456, "eval_samples_per_second": 105.546, "eval_steps_per_second": 6.597, "step": 137000 }, { "epoch": 0.09, "learning_rate": 4.564201088696766e-05, "loss": 1.9231, "step": 137100 }, { "epoch": 0.09, "learning_rate": 4.56388088376561e-05, "loss": 1.9109, "step": 137200 }, { "epoch": 0.09, "learning_rate": 4.5635606788344545e-05, "loss": 1.9305, "step": 137300 }, { "epoch": 0.09, "learning_rate": 4.5632404739032985e-05, "loss": 1.9143, "step": 137400 }, { "epoch": 0.09, "learning_rate": 4.5629202689721425e-05, "loss": 1.9211, "step": 137500 }, { "epoch": 0.09, "learning_rate": 4.5626000640409865e-05, "loss": 1.9095, "step": 137600 }, { "epoch": 0.09, "learning_rate": 4.5622798591098304e-05, "loss": 1.9143, "step": 137700 }, { "epoch": 0.09, "learning_rate": 4.5619596541786744e-05, "loss": 1.9183, "step": 137800 }, { "epoch": 0.09, "learning_rate": 4.5616394492475184e-05, "loss": 1.9171, "step": 137900 }, { "epoch": 0.09, "learning_rate": 4.561319244316363e-05, "loss": 1.8948, "step": 138000 }, { "epoch": 0.09, "eval_loss": 1.7902027368545532, "eval_runtime": 100.248, "eval_samples_per_second": 99.753, "eval_steps_per_second": 6.235, "step": 138000 }, { "epoch": 0.09, "learning_rate": 4.560999039385207e-05, "loss": 1.8899, "step": 138100 }, { "epoch": 0.09, "learning_rate": 4.560678834454051e-05, "loss": 1.8906, "step": 138200 }, { "epoch": 0.09, "learning_rate": 4.560358629522895e-05, "loss": 1.9147, "step": 138300 }, { "epoch": 0.09, "learning_rate": 4.560038424591739e-05, "loss": 1.9283, "step": 138400 }, { "epoch": 0.09, "learning_rate": 4.559718219660583e-05, "loss": 1.9183, "step": 138500 }, { "epoch": 0.09, "learning_rate": 4.559398014729427e-05, "loss": 1.8995, "step": 138600 }, { "epoch": 0.09, "learning_rate": 4.559077809798271e-05, "loss": 1.9084, "step": 138700 }, { "epoch": 0.09, "learning_rate": 4.558757604867115e-05, "loss": 1.8888, "step": 138800 }, { "epoch": 0.09, "learning_rate": 4.5584373999359595e-05, "loss": 1.9294, "step": 138900 }, { "epoch": 0.09, "learning_rate": 4.5581171950048034e-05, "loss": 1.9084, "step": 139000 }, { "epoch": 0.09, "eval_loss": 1.7892931699752808, "eval_runtime": 105.48, "eval_samples_per_second": 94.805, "eval_steps_per_second": 5.925, "step": 139000 }, { "epoch": 0.09, "learning_rate": 4.5577969900736474e-05, "loss": 1.906, "step": 139100 }, { "epoch": 0.09, "learning_rate": 4.5574767851424914e-05, "loss": 1.9314, "step": 139200 }, { "epoch": 0.09, "learning_rate": 4.5571565802113354e-05, "loss": 1.905, "step": 139300 }, { "epoch": 0.09, "learning_rate": 4.556836375280179e-05, "loss": 1.9038, "step": 139400 }, { "epoch": 0.09, "learning_rate": 4.556516170349023e-05, "loss": 1.9357, "step": 139500 }, { "epoch": 0.09, "learning_rate": 4.556195965417868e-05, "loss": 1.9227, "step": 139600 }, { "epoch": 0.09, "learning_rate": 4.555875760486712e-05, "loss": 1.9055, "step": 139700 }, { "epoch": 0.09, "learning_rate": 4.555555555555556e-05, "loss": 1.8935, "step": 139800 }, { "epoch": 0.09, "learning_rate": 4.5552353506244e-05, "loss": 1.9191, "step": 139900 }, { "epoch": 0.09, "learning_rate": 4.554915145693244e-05, "loss": 1.9294, "step": 140000 }, { "epoch": 0.09, "eval_loss": 1.789223074913025, "eval_runtime": 99.726, "eval_samples_per_second": 100.275, "eval_steps_per_second": 6.267, "step": 140000 }, { "epoch": 0.09, "learning_rate": 4.554594940762088e-05, "loss": 1.9261, "step": 140100 }, { "epoch": 0.09, "learning_rate": 4.554274735830932e-05, "loss": 1.9176, "step": 140200 }, { "epoch": 0.09, "learning_rate": 4.553954530899776e-05, "loss": 1.9211, "step": 140300 }, { "epoch": 0.09, "learning_rate": 4.5536343259686204e-05, "loss": 1.919, "step": 140400 }, { "epoch": 0.09, "learning_rate": 4.5533141210374644e-05, "loss": 1.886, "step": 140500 }, { "epoch": 0.09, "learning_rate": 4.5529939161063084e-05, "loss": 1.895, "step": 140600 }, { "epoch": 0.09, "learning_rate": 4.5526737111751524e-05, "loss": 1.935, "step": 140700 }, { "epoch": 0.09, "learning_rate": 4.552353506243996e-05, "loss": 1.9082, "step": 140800 }, { "epoch": 0.09, "learning_rate": 4.55203330131284e-05, "loss": 1.9045, "step": 140900 }, { "epoch": 0.09, "learning_rate": 4.551713096381684e-05, "loss": 1.9079, "step": 141000 }, { "epoch": 0.09, "eval_loss": 1.790201187133789, "eval_runtime": 94.8533, "eval_samples_per_second": 105.426, "eval_steps_per_second": 6.589, "step": 141000 }, { "epoch": 0.09, "learning_rate": 4.551392891450529e-05, "loss": 1.9162, "step": 141100 }, { "epoch": 0.09, "learning_rate": 4.551072686519373e-05, "loss": 1.9204, "step": 141200 }, { "epoch": 0.09, "learning_rate": 4.550752481588217e-05, "loss": 1.8994, "step": 141300 }, { "epoch": 0.09, "learning_rate": 4.550432276657061e-05, "loss": 1.9254, "step": 141400 }, { "epoch": 0.09, "learning_rate": 4.550112071725905e-05, "loss": 1.8768, "step": 141500 }, { "epoch": 0.09, "learning_rate": 4.549791866794749e-05, "loss": 1.9193, "step": 141600 }, { "epoch": 0.09, "learning_rate": 4.549471661863593e-05, "loss": 1.9075, "step": 141700 }, { "epoch": 0.09, "learning_rate": 4.549151456932437e-05, "loss": 1.9058, "step": 141800 }, { "epoch": 0.09, "learning_rate": 4.548831252001281e-05, "loss": 1.9179, "step": 141900 }, { "epoch": 0.09, "learning_rate": 4.5485110470701254e-05, "loss": 1.9134, "step": 142000 }, { "epoch": 0.09, "eval_loss": 1.7899253368377686, "eval_runtime": 94.4772, "eval_samples_per_second": 105.846, "eval_steps_per_second": 6.615, "step": 142000 }, { "epoch": 0.09, "learning_rate": 4.5481908421389693e-05, "loss": 1.9059, "step": 142100 }, { "epoch": 0.09, "learning_rate": 4.547870637207813e-05, "loss": 1.8921, "step": 142200 }, { "epoch": 0.09, "learning_rate": 4.547550432276657e-05, "loss": 1.8924, "step": 142300 }, { "epoch": 0.09, "learning_rate": 4.547230227345501e-05, "loss": 1.9097, "step": 142400 }, { "epoch": 0.09, "learning_rate": 4.546910022414345e-05, "loss": 1.8838, "step": 142500 }, { "epoch": 0.09, "learning_rate": 4.546589817483189e-05, "loss": 1.9433, "step": 142600 }, { "epoch": 0.09, "learning_rate": 4.546269612552034e-05, "loss": 1.911, "step": 142700 }, { "epoch": 0.09, "learning_rate": 4.545949407620878e-05, "loss": 1.9108, "step": 142800 }, { "epoch": 0.09, "learning_rate": 4.545629202689722e-05, "loss": 1.8957, "step": 142900 }, { "epoch": 0.09, "learning_rate": 4.545308997758566e-05, "loss": 1.898, "step": 143000 }, { "epoch": 0.09, "eval_loss": 1.7912750244140625, "eval_runtime": 92.2301, "eval_samples_per_second": 108.425, "eval_steps_per_second": 6.777, "step": 143000 }, { "epoch": 0.09, "learning_rate": 4.54498879282741e-05, "loss": 1.9059, "step": 143100 }, { "epoch": 0.09, "learning_rate": 4.544668587896254e-05, "loss": 1.9082, "step": 143200 }, { "epoch": 0.09, "learning_rate": 4.544348382965098e-05, "loss": 1.9001, "step": 143300 }, { "epoch": 0.09, "learning_rate": 4.5440281780339424e-05, "loss": 1.9091, "step": 143400 }, { "epoch": 0.09, "learning_rate": 4.5437079731027857e-05, "loss": 1.9089, "step": 143500 }, { "epoch": 0.09, "learning_rate": 4.54338776817163e-05, "loss": 1.891, "step": 143600 }, { "epoch": 0.09, "learning_rate": 4.543067563240474e-05, "loss": 1.8972, "step": 143700 }, { "epoch": 0.09, "learning_rate": 4.542747358309318e-05, "loss": 1.9117, "step": 143800 }, { "epoch": 0.09, "learning_rate": 4.542427153378162e-05, "loss": 1.9411, "step": 143900 }, { "epoch": 0.09, "learning_rate": 4.542106948447006e-05, "loss": 1.9007, "step": 144000 }, { "epoch": 0.09, "eval_loss": 1.7896379232406616, "eval_runtime": 93.7798, "eval_samples_per_second": 106.633, "eval_steps_per_second": 6.665, "step": 144000 }, { "epoch": 0.09, "learning_rate": 4.54178674351585e-05, "loss": 1.89, "step": 144100 }, { "epoch": 0.09, "learning_rate": 4.541466538584694e-05, "loss": 1.8936, "step": 144200 }, { "epoch": 0.09, "learning_rate": 4.541146333653539e-05, "loss": 1.9094, "step": 144300 }, { "epoch": 0.09, "learning_rate": 4.540826128722383e-05, "loss": 1.9168, "step": 144400 }, { "epoch": 0.09, "learning_rate": 4.540505923791227e-05, "loss": 1.9168, "step": 144500 }, { "epoch": 0.09, "learning_rate": 4.540185718860071e-05, "loss": 1.9001, "step": 144600 }, { "epoch": 0.09, "learning_rate": 4.539865513928915e-05, "loss": 1.9233, "step": 144700 }, { "epoch": 0.09, "learning_rate": 4.539545308997759e-05, "loss": 1.9094, "step": 144800 }, { "epoch": 0.09, "learning_rate": 4.5392251040666026e-05, "loss": 1.9051, "step": 144900 }, { "epoch": 0.09, "learning_rate": 4.538904899135447e-05, "loss": 1.9036, "step": 145000 }, { "epoch": 0.09, "eval_loss": 1.787335991859436, "eval_runtime": 99.8962, "eval_samples_per_second": 100.104, "eval_steps_per_second": 6.256, "step": 145000 }, { "epoch": 0.09, "learning_rate": 4.5385846942042906e-05, "loss": 1.9024, "step": 145100 }, { "epoch": 0.09, "learning_rate": 4.538264489273135e-05, "loss": 1.9059, "step": 145200 }, { "epoch": 0.09, "learning_rate": 4.537944284341979e-05, "loss": 1.9068, "step": 145300 }, { "epoch": 0.09, "learning_rate": 4.537624079410823e-05, "loss": 1.9268, "step": 145400 }, { "epoch": 0.09, "learning_rate": 4.537303874479667e-05, "loss": 1.8986, "step": 145500 }, { "epoch": 0.09, "learning_rate": 4.536983669548511e-05, "loss": 1.9166, "step": 145600 }, { "epoch": 0.09, "learning_rate": 4.536663464617356e-05, "loss": 1.9078, "step": 145700 }, { "epoch": 0.09, "learning_rate": 4.536343259686199e-05, "loss": 1.895, "step": 145800 }, { "epoch": 0.09, "learning_rate": 4.536023054755044e-05, "loss": 1.898, "step": 145900 }, { "epoch": 0.09, "learning_rate": 4.535702849823888e-05, "loss": 1.9453, "step": 146000 }, { "epoch": 0.09, "eval_loss": 1.7857972383499146, "eval_runtime": 103.8982, "eval_samples_per_second": 96.248, "eval_steps_per_second": 6.016, "step": 146000 }, { "epoch": 0.09, "learning_rate": 4.535382644892732e-05, "loss": 1.9071, "step": 146100 }, { "epoch": 0.09, "learning_rate": 4.5350624399615757e-05, "loss": 1.9039, "step": 146200 }, { "epoch": 0.09, "learning_rate": 4.5347422350304196e-05, "loss": 1.8989, "step": 146300 }, { "epoch": 0.09, "learning_rate": 4.5344220300992636e-05, "loss": 1.9289, "step": 146400 }, { "epoch": 0.09, "learning_rate": 4.5341018251681076e-05, "loss": 1.8907, "step": 146500 }, { "epoch": 0.09, "learning_rate": 4.533781620236952e-05, "loss": 1.8905, "step": 146600 }, { "epoch": 0.09, "learning_rate": 4.5334614153057955e-05, "loss": 1.8886, "step": 146700 }, { "epoch": 0.09, "learning_rate": 4.53314121037464e-05, "loss": 1.8878, "step": 146800 }, { "epoch": 0.09, "learning_rate": 4.532821005443484e-05, "loss": 1.9118, "step": 146900 }, { "epoch": 0.09, "learning_rate": 4.532500800512328e-05, "loss": 1.9019, "step": 147000 }, { "epoch": 0.09, "eval_loss": 1.7876064777374268, "eval_runtime": 95.1245, "eval_samples_per_second": 105.125, "eval_steps_per_second": 6.57, "step": 147000 }, { "epoch": 0.09, "learning_rate": 4.532180595581172e-05, "loss": 1.9243, "step": 147100 }, { "epoch": 0.09, "learning_rate": 4.531860390650016e-05, "loss": 1.8877, "step": 147200 }, { "epoch": 0.09, "learning_rate": 4.531540185718861e-05, "loss": 1.9062, "step": 147300 }, { "epoch": 0.09, "learning_rate": 4.531219980787704e-05, "loss": 1.9194, "step": 147400 }, { "epoch": 0.09, "learning_rate": 4.530899775856549e-05, "loss": 1.9086, "step": 147500 }, { "epoch": 0.09, "learning_rate": 4.5305795709253926e-05, "loss": 1.8889, "step": 147600 }, { "epoch": 0.09, "learning_rate": 4.5302593659942366e-05, "loss": 1.8946, "step": 147700 }, { "epoch": 0.09, "learning_rate": 4.5299391610630806e-05, "loss": 1.8932, "step": 147800 }, { "epoch": 0.09, "learning_rate": 4.5296189561319246e-05, "loss": 1.894, "step": 147900 }, { "epoch": 0.09, "learning_rate": 4.529298751200769e-05, "loss": 1.9064, "step": 148000 }, { "epoch": 0.09, "eval_loss": 1.7837945222854614, "eval_runtime": 96.3016, "eval_samples_per_second": 103.84, "eval_steps_per_second": 6.49, "step": 148000 }, { "epoch": 0.09, "learning_rate": 4.5289785462696125e-05, "loss": 1.895, "step": 148100 }, { "epoch": 0.09, "learning_rate": 4.528658341338457e-05, "loss": 1.8999, "step": 148200 }, { "epoch": 0.09, "learning_rate": 4.5283381364073005e-05, "loss": 1.8981, "step": 148300 }, { "epoch": 0.09, "learning_rate": 4.528017931476145e-05, "loss": 1.8939, "step": 148400 }, { "epoch": 0.1, "learning_rate": 4.527697726544989e-05, "loss": 1.9038, "step": 148500 }, { "epoch": 0.1, "learning_rate": 4.527377521613833e-05, "loss": 1.9119, "step": 148600 }, { "epoch": 0.1, "learning_rate": 4.527057316682678e-05, "loss": 1.914, "step": 148700 }, { "epoch": 0.1, "learning_rate": 4.526737111751521e-05, "loss": 1.9068, "step": 148800 }, { "epoch": 0.1, "learning_rate": 4.526416906820366e-05, "loss": 1.9063, "step": 148900 }, { "epoch": 0.1, "learning_rate": 4.526096701889209e-05, "loss": 1.908, "step": 149000 }, { "epoch": 0.1, "eval_loss": 1.7880679368972778, "eval_runtime": 92.1649, "eval_samples_per_second": 108.501, "eval_steps_per_second": 6.781, "step": 149000 }, { "epoch": 0.1, "learning_rate": 4.5257764969580536e-05, "loss": 1.9037, "step": 149100 }, { "epoch": 0.1, "learning_rate": 4.5254562920268976e-05, "loss": 1.8986, "step": 149200 }, { "epoch": 0.1, "learning_rate": 4.5251360870957416e-05, "loss": 1.9027, "step": 149300 }, { "epoch": 0.1, "learning_rate": 4.5248158821645855e-05, "loss": 1.8883, "step": 149400 }, { "epoch": 0.1, "learning_rate": 4.5244956772334295e-05, "loss": 1.9052, "step": 149500 }, { "epoch": 0.1, "learning_rate": 4.524175472302274e-05, "loss": 1.9135, "step": 149600 }, { "epoch": 0.1, "learning_rate": 4.5238552673711175e-05, "loss": 1.9067, "step": 149700 }, { "epoch": 0.1, "learning_rate": 4.523535062439962e-05, "loss": 1.9001, "step": 149800 }, { "epoch": 0.1, "learning_rate": 4.5232148575088054e-05, "loss": 1.8975, "step": 149900 }, { "epoch": 0.1, "learning_rate": 4.52289465257765e-05, "loss": 1.8935, "step": 150000 }, { "epoch": 0.1, "eval_loss": 1.785008192062378, "eval_runtime": 91.2416, "eval_samples_per_second": 109.599, "eval_steps_per_second": 6.85, "step": 150000 }, { "epoch": 0.1, "learning_rate": 4.522574447646494e-05, "loss": 1.9242, "step": 150100 }, { "epoch": 0.1, "learning_rate": 4.522254242715338e-05, "loss": 1.9185, "step": 150200 }, { "epoch": 0.1, "learning_rate": 4.5219340377841827e-05, "loss": 1.887, "step": 150300 }, { "epoch": 0.1, "learning_rate": 4.521613832853026e-05, "loss": 1.9168, "step": 150400 }, { "epoch": 0.1, "learning_rate": 4.5212936279218706e-05, "loss": 1.8879, "step": 150500 }, { "epoch": 0.1, "learning_rate": 4.520973422990714e-05, "loss": 1.8929, "step": 150600 }, { "epoch": 0.1, "learning_rate": 4.5206532180595585e-05, "loss": 1.9042, "step": 150700 }, { "epoch": 0.1, "learning_rate": 4.5203330131284025e-05, "loss": 1.8869, "step": 150800 }, { "epoch": 0.1, "learning_rate": 4.5200128081972465e-05, "loss": 1.9086, "step": 150900 }, { "epoch": 0.1, "learning_rate": 4.5196926032660905e-05, "loss": 1.8978, "step": 151000 }, { "epoch": 0.1, "eval_loss": 1.788016438484192, "eval_runtime": 91.1439, "eval_samples_per_second": 109.717, "eval_steps_per_second": 6.857, "step": 151000 }, { "epoch": 0.1, "learning_rate": 4.5193723983349344e-05, "loss": 1.8847, "step": 151100 }, { "epoch": 0.1, "learning_rate": 4.519052193403779e-05, "loss": 1.9041, "step": 151200 }, { "epoch": 0.1, "learning_rate": 4.5187319884726224e-05, "loss": 1.921, "step": 151300 }, { "epoch": 0.1, "learning_rate": 4.518411783541467e-05, "loss": 1.8871, "step": 151400 }, { "epoch": 0.1, "learning_rate": 4.51809157861031e-05, "loss": 1.898, "step": 151500 }, { "epoch": 0.1, "learning_rate": 4.517771373679155e-05, "loss": 1.9006, "step": 151600 }, { "epoch": 0.1, "learning_rate": 4.517451168747999e-05, "loss": 1.9007, "step": 151700 }, { "epoch": 0.1, "learning_rate": 4.517130963816843e-05, "loss": 1.9245, "step": 151800 }, { "epoch": 0.1, "learning_rate": 4.5168107588856876e-05, "loss": 1.9106, "step": 151900 }, { "epoch": 0.1, "learning_rate": 4.516490553954531e-05, "loss": 1.9059, "step": 152000 }, { "epoch": 0.1, "eval_loss": 1.783328890800476, "eval_runtime": 91.1633, "eval_samples_per_second": 109.693, "eval_steps_per_second": 6.856, "step": 152000 }, { "epoch": 0.1, "learning_rate": 4.5161703490233755e-05, "loss": 1.9111, "step": 152100 }, { "epoch": 0.1, "learning_rate": 4.515850144092219e-05, "loss": 1.8844, "step": 152200 }, { "epoch": 0.1, "learning_rate": 4.5155299391610635e-05, "loss": 1.9046, "step": 152300 }, { "epoch": 0.1, "learning_rate": 4.515209734229907e-05, "loss": 1.8979, "step": 152400 }, { "epoch": 0.1, "learning_rate": 4.5148895292987514e-05, "loss": 1.9215, "step": 152500 }, { "epoch": 0.1, "learning_rate": 4.5145693243675954e-05, "loss": 1.8831, "step": 152600 }, { "epoch": 0.1, "learning_rate": 4.5142491194364394e-05, "loss": 1.8897, "step": 152700 }, { "epoch": 0.1, "learning_rate": 4.513928914505284e-05, "loss": 1.8844, "step": 152800 }, { "epoch": 0.1, "learning_rate": 4.513608709574127e-05, "loss": 1.889, "step": 152900 }, { "epoch": 0.1, "learning_rate": 4.513288504642972e-05, "loss": 1.8861, "step": 153000 }, { "epoch": 0.1, "eval_loss": 1.7829804420471191, "eval_runtime": 92.8936, "eval_samples_per_second": 107.65, "eval_steps_per_second": 6.728, "step": 153000 }, { "epoch": 0.1, "learning_rate": 4.512968299711815e-05, "loss": 1.8986, "step": 153100 }, { "epoch": 0.1, "learning_rate": 4.51264809478066e-05, "loss": 1.8955, "step": 153200 }, { "epoch": 0.1, "learning_rate": 4.512327889849504e-05, "loss": 1.8721, "step": 153300 }, { "epoch": 0.1, "learning_rate": 4.512007684918348e-05, "loss": 1.9035, "step": 153400 }, { "epoch": 0.1, "learning_rate": 4.5116874799871925e-05, "loss": 1.8974, "step": 153500 }, { "epoch": 0.1, "learning_rate": 4.511367275056036e-05, "loss": 1.8901, "step": 153600 }, { "epoch": 0.1, "learning_rate": 4.5110470701248805e-05, "loss": 1.8922, "step": 153700 }, { "epoch": 0.1, "learning_rate": 4.510726865193724e-05, "loss": 1.8596, "step": 153800 }, { "epoch": 0.1, "learning_rate": 4.5104066602625684e-05, "loss": 1.8939, "step": 153900 }, { "epoch": 0.1, "learning_rate": 4.510086455331412e-05, "loss": 1.9064, "step": 154000 }, { "epoch": 0.1, "eval_loss": 1.7862111330032349, "eval_runtime": 98.7607, "eval_samples_per_second": 101.255, "eval_steps_per_second": 6.328, "step": 154000 }, { "epoch": 0.1, "learning_rate": 4.5097662504002564e-05, "loss": 1.8829, "step": 154100 }, { "epoch": 0.1, "learning_rate": 4.5094460454691003e-05, "loss": 1.8953, "step": 154200 }, { "epoch": 0.1, "learning_rate": 4.509125840537944e-05, "loss": 1.8819, "step": 154300 }, { "epoch": 0.1, "learning_rate": 4.508805635606789e-05, "loss": 1.8762, "step": 154400 }, { "epoch": 0.1, "learning_rate": 4.508485430675632e-05, "loss": 1.9052, "step": 154500 }, { "epoch": 0.1, "learning_rate": 4.508165225744477e-05, "loss": 1.912, "step": 154600 }, { "epoch": 0.1, "learning_rate": 4.50784502081332e-05, "loss": 1.8866, "step": 154700 }, { "epoch": 0.1, "learning_rate": 4.507524815882165e-05, "loss": 1.9121, "step": 154800 }, { "epoch": 0.1, "learning_rate": 4.507204610951009e-05, "loss": 1.9146, "step": 154900 }, { "epoch": 0.1, "learning_rate": 4.506884406019853e-05, "loss": 1.8907, "step": 155000 }, { "epoch": 0.1, "eval_loss": 1.7840514183044434, "eval_runtime": 98.6134, "eval_samples_per_second": 101.406, "eval_steps_per_second": 6.338, "step": 155000 }, { "epoch": 0.1, "learning_rate": 4.5065642010886975e-05, "loss": 1.8893, "step": 155100 }, { "epoch": 0.1, "learning_rate": 4.506243996157541e-05, "loss": 1.8877, "step": 155200 }, { "epoch": 0.1, "learning_rate": 4.5059237912263854e-05, "loss": 1.9055, "step": 155300 }, { "epoch": 0.1, "learning_rate": 4.505603586295229e-05, "loss": 1.8847, "step": 155400 }, { "epoch": 0.1, "learning_rate": 4.5052833813640734e-05, "loss": 1.8859, "step": 155500 }, { "epoch": 0.1, "learning_rate": 4.504963176432917e-05, "loss": 1.9078, "step": 155600 }, { "epoch": 0.1, "learning_rate": 4.504642971501761e-05, "loss": 1.8905, "step": 155700 }, { "epoch": 0.1, "learning_rate": 4.504322766570605e-05, "loss": 1.8982, "step": 155800 }, { "epoch": 0.1, "learning_rate": 4.504002561639449e-05, "loss": 1.9147, "step": 155900 }, { "epoch": 0.1, "learning_rate": 4.503682356708294e-05, "loss": 1.9058, "step": 156000 }, { "epoch": 0.1, "eval_loss": 1.7839281558990479, "eval_runtime": 96.8979, "eval_samples_per_second": 103.201, "eval_steps_per_second": 6.45, "step": 156000 }, { "epoch": 0.1, "learning_rate": 4.503362151777137e-05, "loss": 1.8905, "step": 156100 }, { "epoch": 0.1, "learning_rate": 4.503041946845982e-05, "loss": 1.903, "step": 156200 }, { "epoch": 0.1, "learning_rate": 4.502721741914825e-05, "loss": 1.8997, "step": 156300 }, { "epoch": 0.1, "learning_rate": 4.50240153698367e-05, "loss": 1.9141, "step": 156400 }, { "epoch": 0.1, "learning_rate": 4.502081332052514e-05, "loss": 1.8947, "step": 156500 }, { "epoch": 0.1, "learning_rate": 4.501761127121358e-05, "loss": 1.8863, "step": 156600 }, { "epoch": 0.1, "learning_rate": 4.5014409221902024e-05, "loss": 1.9086, "step": 156700 }, { "epoch": 0.1, "learning_rate": 4.501120717259046e-05, "loss": 1.8878, "step": 156800 }, { "epoch": 0.1, "learning_rate": 4.5008005123278903e-05, "loss": 1.8866, "step": 156900 }, { "epoch": 0.1, "learning_rate": 4.5004803073967336e-05, "loss": 1.917, "step": 157000 }, { "epoch": 0.1, "eval_loss": 1.7810734510421753, "eval_runtime": 93.4514, "eval_samples_per_second": 107.007, "eval_steps_per_second": 6.688, "step": 157000 }, { "epoch": 0.1, "learning_rate": 4.500160102465578e-05, "loss": 1.9164, "step": 157100 }, { "epoch": 0.1, "learning_rate": 4.499839897534422e-05, "loss": 1.8656, "step": 157200 }, { "epoch": 0.1, "learning_rate": 4.499519692603266e-05, "loss": 1.8903, "step": 157300 }, { "epoch": 0.1, "learning_rate": 4.49919948767211e-05, "loss": 1.9077, "step": 157400 }, { "epoch": 0.1, "learning_rate": 4.498879282740954e-05, "loss": 1.8875, "step": 157500 }, { "epoch": 0.1, "learning_rate": 4.498559077809799e-05, "loss": 1.8684, "step": 157600 }, { "epoch": 0.1, "learning_rate": 4.498238872878642e-05, "loss": 1.8874, "step": 157700 }, { "epoch": 0.1, "learning_rate": 4.497918667947487e-05, "loss": 1.8889, "step": 157800 }, { "epoch": 0.1, "learning_rate": 4.497598463016331e-05, "loss": 1.9023, "step": 157900 }, { "epoch": 0.1, "learning_rate": 4.497278258085175e-05, "loss": 1.8874, "step": 158000 }, { "epoch": 0.1, "eval_loss": 1.7826582193374634, "eval_runtime": 91.9858, "eval_samples_per_second": 108.712, "eval_steps_per_second": 6.795, "step": 158000 }, { "epoch": 0.1, "learning_rate": 4.496958053154019e-05, "loss": 1.8968, "step": 158100 }, { "epoch": 0.1, "learning_rate": 4.496637848222863e-05, "loss": 1.8985, "step": 158200 }, { "epoch": 0.1, "learning_rate": 4.496317643291707e-05, "loss": 1.8816, "step": 158300 }, { "epoch": 0.1, "learning_rate": 4.4959974383605506e-05, "loss": 1.8911, "step": 158400 }, { "epoch": 0.1, "learning_rate": 4.495677233429395e-05, "loss": 1.8891, "step": 158500 }, { "epoch": 0.1, "learning_rate": 4.495357028498239e-05, "loss": 1.8912, "step": 158600 }, { "epoch": 0.1, "learning_rate": 4.495036823567083e-05, "loss": 1.8917, "step": 158700 }, { "epoch": 0.1, "learning_rate": 4.494716618635927e-05, "loss": 1.8972, "step": 158800 }, { "epoch": 0.1, "learning_rate": 4.494396413704771e-05, "loss": 1.8951, "step": 158900 }, { "epoch": 0.1, "learning_rate": 4.494076208773615e-05, "loss": 1.9057, "step": 159000 }, { "epoch": 0.1, "eval_loss": 1.7811068296432495, "eval_runtime": 93.0076, "eval_samples_per_second": 107.518, "eval_steps_per_second": 6.72, "step": 159000 }, { "epoch": 0.1, "learning_rate": 4.493756003842459e-05, "loss": 1.8939, "step": 159100 }, { "epoch": 0.1, "learning_rate": 4.493435798911304e-05, "loss": 1.8979, "step": 159200 }, { "epoch": 0.1, "learning_rate": 4.493115593980147e-05, "loss": 1.8948, "step": 159300 }, { "epoch": 0.1, "learning_rate": 4.492795389048992e-05, "loss": 1.893, "step": 159400 }, { "epoch": 0.1, "learning_rate": 4.492475184117836e-05, "loss": 1.8832, "step": 159500 }, { "epoch": 0.1, "learning_rate": 4.49215497918668e-05, "loss": 1.8882, "step": 159600 }, { "epoch": 0.1, "learning_rate": 4.4918347742555236e-05, "loss": 1.9025, "step": 159700 }, { "epoch": 0.1, "learning_rate": 4.4915145693243676e-05, "loss": 1.8911, "step": 159800 }, { "epoch": 0.1, "learning_rate": 4.491194364393212e-05, "loss": 1.8982, "step": 159900 }, { "epoch": 0.1, "learning_rate": 4.4908741594620556e-05, "loss": 1.8903, "step": 160000 }, { "epoch": 0.1, "eval_loss": 1.7842870950698853, "eval_runtime": 93.9496, "eval_samples_per_second": 106.44, "eval_steps_per_second": 6.653, "step": 160000 }, { "epoch": 0.1, "learning_rate": 4.4905539545309e-05, "loss": 1.8937, "step": 160100 }, { "epoch": 0.1, "learning_rate": 4.490233749599744e-05, "loss": 1.8858, "step": 160200 }, { "epoch": 0.1, "learning_rate": 4.489913544668588e-05, "loss": 1.8901, "step": 160300 }, { "epoch": 0.1, "learning_rate": 4.489593339737432e-05, "loss": 1.8832, "step": 160400 }, { "epoch": 0.1, "learning_rate": 4.489273134806276e-05, "loss": 1.8778, "step": 160500 }, { "epoch": 0.1, "learning_rate": 4.48895292987512e-05, "loss": 1.878, "step": 160600 }, { "epoch": 0.1, "learning_rate": 4.488632724943964e-05, "loss": 1.8751, "step": 160700 }, { "epoch": 0.1, "learning_rate": 4.488312520012809e-05, "loss": 1.9043, "step": 160800 }, { "epoch": 0.1, "learning_rate": 4.487992315081653e-05, "loss": 1.8878, "step": 160900 }, { "epoch": 0.1, "learning_rate": 4.487672110150497e-05, "loss": 1.8807, "step": 161000 }, { "epoch": 0.1, "eval_loss": 1.7826472520828247, "eval_runtime": 94.5428, "eval_samples_per_second": 105.772, "eval_steps_per_second": 6.611, "step": 161000 }, { "epoch": 0.1, "learning_rate": 4.4873519052193406e-05, "loss": 1.8652, "step": 161100 }, { "epoch": 0.1, "learning_rate": 4.4870317002881846e-05, "loss": 1.8947, "step": 161200 }, { "epoch": 0.1, "learning_rate": 4.4867114953570286e-05, "loss": 1.8974, "step": 161300 }, { "epoch": 0.1, "learning_rate": 4.4863912904258726e-05, "loss": 1.894, "step": 161400 }, { "epoch": 0.1, "learning_rate": 4.486071085494717e-05, "loss": 1.8969, "step": 161500 }, { "epoch": 0.1, "learning_rate": 4.4857508805635605e-05, "loss": 1.8904, "step": 161600 }, { "epoch": 0.1, "learning_rate": 4.485430675632405e-05, "loss": 1.8767, "step": 161700 }, { "epoch": 0.1, "learning_rate": 4.485110470701249e-05, "loss": 1.8884, "step": 161800 }, { "epoch": 0.1, "learning_rate": 4.484790265770093e-05, "loss": 1.9001, "step": 161900 }, { "epoch": 0.1, "learning_rate": 4.484470060838937e-05, "loss": 1.9023, "step": 162000 }, { "epoch": 0.1, "eval_loss": 1.7827138900756836, "eval_runtime": 95.8687, "eval_samples_per_second": 104.309, "eval_steps_per_second": 6.519, "step": 162000 }, { "epoch": 0.1, "learning_rate": 4.484149855907781e-05, "loss": 1.8709, "step": 162100 }, { "epoch": 0.1, "learning_rate": 4.483829650976625e-05, "loss": 1.9039, "step": 162200 }, { "epoch": 0.1, "learning_rate": 4.483509446045469e-05, "loss": 1.8967, "step": 162300 }, { "epoch": 0.1, "learning_rate": 4.4831892411143137e-05, "loss": 1.8973, "step": 162400 }, { "epoch": 0.1, "learning_rate": 4.4828690361831576e-05, "loss": 1.8731, "step": 162500 }, { "epoch": 0.1, "learning_rate": 4.4825488312520016e-05, "loss": 1.8889, "step": 162600 }, { "epoch": 0.1, "learning_rate": 4.4822286263208456e-05, "loss": 1.9102, "step": 162700 }, { "epoch": 0.1, "learning_rate": 4.4819084213896896e-05, "loss": 1.8821, "step": 162800 }, { "epoch": 0.1, "learning_rate": 4.4815882164585335e-05, "loss": 1.9024, "step": 162900 }, { "epoch": 0.1, "learning_rate": 4.4812680115273775e-05, "loss": 1.8762, "step": 163000 }, { "epoch": 0.1, "eval_loss": 1.7835177183151245, "eval_runtime": 93.2851, "eval_samples_per_second": 107.198, "eval_steps_per_second": 6.7, "step": 163000 }, { "epoch": 0.1, "learning_rate": 4.480947806596222e-05, "loss": 1.8731, "step": 163100 }, { "epoch": 0.1, "learning_rate": 4.480627601665066e-05, "loss": 1.9044, "step": 163200 }, { "epoch": 0.1, "learning_rate": 4.48030739673391e-05, "loss": 1.8877, "step": 163300 }, { "epoch": 0.1, "learning_rate": 4.479987191802754e-05, "loss": 1.8553, "step": 163400 }, { "epoch": 0.1, "learning_rate": 4.479666986871598e-05, "loss": 1.8778, "step": 163500 }, { "epoch": 0.1, "learning_rate": 4.479346781940442e-05, "loss": 1.8884, "step": 163600 }, { "epoch": 0.1, "learning_rate": 4.479026577009286e-05, "loss": 1.8933, "step": 163700 }, { "epoch": 0.1, "learning_rate": 4.47870637207813e-05, "loss": 1.8832, "step": 163800 }, { "epoch": 0.1, "learning_rate": 4.478386167146974e-05, "loss": 1.8867, "step": 163900 }, { "epoch": 0.1, "learning_rate": 4.4780659622158186e-05, "loss": 1.8931, "step": 164000 }, { "epoch": 0.1, "eval_loss": 1.7827950716018677, "eval_runtime": 99.0397, "eval_samples_per_second": 100.97, "eval_steps_per_second": 6.311, "step": 164000 }, { "epoch": 0.11, "learning_rate": 4.4777457572846626e-05, "loss": 1.8674, "step": 164100 }, { "epoch": 0.11, "learning_rate": 4.4774255523535065e-05, "loss": 1.8716, "step": 164200 }, { "epoch": 0.11, "learning_rate": 4.4771053474223505e-05, "loss": 1.9016, "step": 164300 }, { "epoch": 0.11, "learning_rate": 4.4767851424911945e-05, "loss": 1.8923, "step": 164400 }, { "epoch": 0.11, "learning_rate": 4.4764649375600385e-05, "loss": 1.8925, "step": 164500 }, { "epoch": 0.11, "learning_rate": 4.4761447326288824e-05, "loss": 1.92, "step": 164600 }, { "epoch": 0.11, "learning_rate": 4.475824527697727e-05, "loss": 1.886, "step": 164700 }, { "epoch": 0.11, "learning_rate": 4.475504322766571e-05, "loss": 1.8949, "step": 164800 }, { "epoch": 0.11, "learning_rate": 4.475184117835415e-05, "loss": 1.9209, "step": 164900 }, { "epoch": 0.11, "learning_rate": 4.474863912904259e-05, "loss": 1.898, "step": 165000 }, { "epoch": 0.11, "eval_loss": 1.7788496017456055, "eval_runtime": 96.3328, "eval_samples_per_second": 103.807, "eval_steps_per_second": 6.488, "step": 165000 }, { "epoch": 0.11, "learning_rate": 4.474543707973103e-05, "loss": 1.8934, "step": 165100 }, { "epoch": 0.11, "learning_rate": 4.474223503041947e-05, "loss": 1.8785, "step": 165200 }, { "epoch": 0.11, "learning_rate": 4.473903298110791e-05, "loss": 1.8997, "step": 165300 }, { "epoch": 0.11, "learning_rate": 4.473583093179635e-05, "loss": 1.8823, "step": 165400 }, { "epoch": 0.11, "learning_rate": 4.4732628882484796e-05, "loss": 1.8729, "step": 165500 }, { "epoch": 0.11, "learning_rate": 4.4729426833173235e-05, "loss": 1.8972, "step": 165600 }, { "epoch": 0.11, "learning_rate": 4.4726224783861675e-05, "loss": 1.8874, "step": 165700 }, { "epoch": 0.11, "learning_rate": 4.4723022734550115e-05, "loss": 1.8699, "step": 165800 }, { "epoch": 0.11, "learning_rate": 4.4719820685238555e-05, "loss": 1.8927, "step": 165900 }, { "epoch": 0.11, "learning_rate": 4.4716618635926994e-05, "loss": 1.9092, "step": 166000 }, { "epoch": 0.11, "eval_loss": 1.7802164554595947, "eval_runtime": 98.6627, "eval_samples_per_second": 101.355, "eval_steps_per_second": 6.335, "step": 166000 }, { "epoch": 0.11, "learning_rate": 4.4713416586615434e-05, "loss": 1.876, "step": 166100 }, { "epoch": 0.11, "learning_rate": 4.4710214537303874e-05, "loss": 1.8776, "step": 166200 }, { "epoch": 0.11, "learning_rate": 4.470701248799232e-05, "loss": 1.8994, "step": 166300 }, { "epoch": 0.11, "learning_rate": 4.470381043868076e-05, "loss": 1.8875, "step": 166400 }, { "epoch": 0.11, "learning_rate": 4.47006083893692e-05, "loss": 1.8813, "step": 166500 }, { "epoch": 0.11, "learning_rate": 4.469740634005764e-05, "loss": 1.8873, "step": 166600 }, { "epoch": 0.11, "learning_rate": 4.469420429074608e-05, "loss": 1.8796, "step": 166700 }, { "epoch": 0.11, "learning_rate": 4.469100224143452e-05, "loss": 1.8844, "step": 166800 }, { "epoch": 0.11, "learning_rate": 4.468780019212296e-05, "loss": 1.8857, "step": 166900 }, { "epoch": 0.11, "learning_rate": 4.46845981428114e-05, "loss": 1.8851, "step": 167000 }, { "epoch": 0.11, "eval_loss": 1.7809312343597412, "eval_runtime": 94.4035, "eval_samples_per_second": 105.928, "eval_steps_per_second": 6.621, "step": 167000 }, { "epoch": 0.11, "learning_rate": 4.4681396093499845e-05, "loss": 1.9068, "step": 167100 }, { "epoch": 0.11, "learning_rate": 4.4678194044188285e-05, "loss": 1.8677, "step": 167200 }, { "epoch": 0.11, "learning_rate": 4.4674991994876724e-05, "loss": 1.8935, "step": 167300 }, { "epoch": 0.11, "learning_rate": 4.4671789945565164e-05, "loss": 1.8718, "step": 167400 }, { "epoch": 0.11, "learning_rate": 4.4668587896253604e-05, "loss": 1.8887, "step": 167500 }, { "epoch": 0.11, "learning_rate": 4.4665385846942044e-05, "loss": 1.9175, "step": 167600 }, { "epoch": 0.11, "learning_rate": 4.466218379763048e-05, "loss": 1.8935, "step": 167700 }, { "epoch": 0.11, "learning_rate": 4.465898174831893e-05, "loss": 1.9176, "step": 167800 }, { "epoch": 0.11, "learning_rate": 4.465577969900737e-05, "loss": 1.8784, "step": 167900 }, { "epoch": 0.11, "learning_rate": 4.465257764969581e-05, "loss": 1.8812, "step": 168000 }, { "epoch": 0.11, "eval_loss": 1.782820463180542, "eval_runtime": 99.4772, "eval_samples_per_second": 100.526, "eval_steps_per_second": 6.283, "step": 168000 }, { "epoch": 0.11, "learning_rate": 4.464937560038425e-05, "loss": 1.8936, "step": 168100 }, { "epoch": 0.11, "learning_rate": 4.464617355107269e-05, "loss": 1.8905, "step": 168200 }, { "epoch": 0.11, "learning_rate": 4.464297150176113e-05, "loss": 1.895, "step": 168300 }, { "epoch": 0.11, "learning_rate": 4.463976945244957e-05, "loss": 1.8788, "step": 168400 }, { "epoch": 0.11, "learning_rate": 4.4636567403138015e-05, "loss": 1.8762, "step": 168500 }, { "epoch": 0.11, "learning_rate": 4.463336535382645e-05, "loss": 1.862, "step": 168600 }, { "epoch": 0.11, "learning_rate": 4.4630163304514894e-05, "loss": 1.8979, "step": 168700 }, { "epoch": 0.11, "learning_rate": 4.4626961255203334e-05, "loss": 1.8931, "step": 168800 }, { "epoch": 0.11, "learning_rate": 4.4623759205891774e-05, "loss": 1.8995, "step": 168900 }, { "epoch": 0.11, "learning_rate": 4.4620557156580214e-05, "loss": 1.88, "step": 169000 }, { "epoch": 0.11, "eval_loss": 1.7770270109176636, "eval_runtime": 95.142, "eval_samples_per_second": 105.106, "eval_steps_per_second": 6.569, "step": 169000 }, { "epoch": 0.11, "learning_rate": 4.461735510726865e-05, "loss": 1.8907, "step": 169100 }, { "epoch": 0.11, "learning_rate": 4.461415305795709e-05, "loss": 1.8838, "step": 169200 }, { "epoch": 0.11, "learning_rate": 4.461095100864553e-05, "loss": 1.8751, "step": 169300 }, { "epoch": 0.11, "learning_rate": 4.460774895933398e-05, "loss": 1.8568, "step": 169400 }, { "epoch": 0.11, "learning_rate": 4.460454691002241e-05, "loss": 1.8686, "step": 169500 }, { "epoch": 0.11, "learning_rate": 4.460134486071086e-05, "loss": 1.9074, "step": 169600 }, { "epoch": 0.11, "learning_rate": 4.45981428113993e-05, "loss": 1.8716, "step": 169700 }, { "epoch": 0.11, "learning_rate": 4.459494076208774e-05, "loss": 1.8923, "step": 169800 }, { "epoch": 0.11, "learning_rate": 4.459173871277618e-05, "loss": 1.8902, "step": 169900 }, { "epoch": 0.11, "learning_rate": 4.458853666346462e-05, "loss": 1.8905, "step": 170000 }, { "epoch": 0.11, "eval_loss": 1.7760599851608276, "eval_runtime": 94.2511, "eval_samples_per_second": 106.1, "eval_steps_per_second": 6.631, "step": 170000 }, { "epoch": 0.11, "learning_rate": 4.4585334614153064e-05, "loss": 1.8887, "step": 170100 }, { "epoch": 0.11, "learning_rate": 4.45821325648415e-05, "loss": 1.8719, "step": 170200 }, { "epoch": 0.11, "learning_rate": 4.4578930515529944e-05, "loss": 1.8917, "step": 170300 }, { "epoch": 0.11, "learning_rate": 4.4575728466218383e-05, "loss": 1.8805, "step": 170400 }, { "epoch": 0.11, "learning_rate": 4.457252641690682e-05, "loss": 1.8794, "step": 170500 }, { "epoch": 0.11, "learning_rate": 4.456932436759526e-05, "loss": 1.9006, "step": 170600 }, { "epoch": 0.11, "learning_rate": 4.45661223182837e-05, "loss": 1.8777, "step": 170700 }, { "epoch": 0.11, "learning_rate": 4.456292026897215e-05, "loss": 1.8843, "step": 170800 }, { "epoch": 0.11, "learning_rate": 4.455971821966058e-05, "loss": 1.8772, "step": 170900 }, { "epoch": 0.11, "learning_rate": 4.455651617034903e-05, "loss": 1.8751, "step": 171000 }, { "epoch": 0.11, "eval_loss": 1.7757519483566284, "eval_runtime": 109.6738, "eval_samples_per_second": 91.179, "eval_steps_per_second": 5.699, "step": 171000 }, { "epoch": 0.11, "learning_rate": 4.455331412103746e-05, "loss": 1.8856, "step": 171100 }, { "epoch": 0.11, "learning_rate": 4.455011207172591e-05, "loss": 1.8658, "step": 171200 }, { "epoch": 0.11, "learning_rate": 4.454691002241435e-05, "loss": 1.887, "step": 171300 }, { "epoch": 0.11, "learning_rate": 4.454370797310279e-05, "loss": 1.8832, "step": 171400 }, { "epoch": 0.11, "learning_rate": 4.454050592379123e-05, "loss": 1.891, "step": 171500 }, { "epoch": 0.11, "learning_rate": 4.453730387447967e-05, "loss": 1.8892, "step": 171600 }, { "epoch": 0.11, "learning_rate": 4.4534101825168114e-05, "loss": 1.8874, "step": 171700 }, { "epoch": 0.11, "learning_rate": 4.4530899775856547e-05, "loss": 1.8555, "step": 171800 }, { "epoch": 0.11, "learning_rate": 4.452769772654499e-05, "loss": 1.8788, "step": 171900 }, { "epoch": 0.11, "learning_rate": 4.452449567723343e-05, "loss": 1.8783, "step": 172000 }, { "epoch": 0.11, "eval_loss": 1.7766636610031128, "eval_runtime": 94.0764, "eval_samples_per_second": 106.297, "eval_steps_per_second": 6.644, "step": 172000 }, { "epoch": 0.11, "learning_rate": 4.452129362792187e-05, "loss": 1.8934, "step": 172100 }, { "epoch": 0.11, "learning_rate": 4.451809157861031e-05, "loss": 1.8841, "step": 172200 }, { "epoch": 0.11, "learning_rate": 4.451488952929875e-05, "loss": 1.8922, "step": 172300 }, { "epoch": 0.11, "learning_rate": 4.45116874799872e-05, "loss": 1.8831, "step": 172400 }, { "epoch": 0.11, "learning_rate": 4.450848543067563e-05, "loss": 1.8775, "step": 172500 }, { "epoch": 0.11, "learning_rate": 4.450528338136408e-05, "loss": 1.8786, "step": 172600 }, { "epoch": 0.11, "learning_rate": 4.450208133205251e-05, "loss": 1.8813, "step": 172700 }, { "epoch": 0.11, "learning_rate": 4.449887928274096e-05, "loss": 1.8804, "step": 172800 }, { "epoch": 0.11, "learning_rate": 4.44956772334294e-05, "loss": 1.8669, "step": 172900 }, { "epoch": 0.11, "learning_rate": 4.449247518411784e-05, "loss": 1.8623, "step": 173000 }, { "epoch": 0.11, "eval_loss": 1.7767001390457153, "eval_runtime": 92.5395, "eval_samples_per_second": 108.062, "eval_steps_per_second": 6.754, "step": 173000 }, { "epoch": 0.11, "learning_rate": 4.4489273134806283e-05, "loss": 1.8734, "step": 173100 }, { "epoch": 0.11, "learning_rate": 4.4486071085494716e-05, "loss": 1.8521, "step": 173200 }, { "epoch": 0.11, "learning_rate": 4.448286903618316e-05, "loss": 1.8631, "step": 173300 }, { "epoch": 0.11, "learning_rate": 4.4479666986871596e-05, "loss": 1.8513, "step": 173400 }, { "epoch": 0.11, "learning_rate": 4.447646493756004e-05, "loss": 1.897, "step": 173500 }, { "epoch": 0.11, "learning_rate": 4.447326288824848e-05, "loss": 1.8713, "step": 173600 }, { "epoch": 0.11, "learning_rate": 4.447006083893692e-05, "loss": 1.8612, "step": 173700 }, { "epoch": 0.11, "learning_rate": 4.446685878962536e-05, "loss": 1.8573, "step": 173800 }, { "epoch": 0.11, "learning_rate": 4.44636567403138e-05, "loss": 1.8677, "step": 173900 }, { "epoch": 0.11, "learning_rate": 4.446045469100225e-05, "loss": 1.8768, "step": 174000 }, { "epoch": 0.11, "eval_loss": 1.7759476900100708, "eval_runtime": 93.4663, "eval_samples_per_second": 106.99, "eval_steps_per_second": 6.687, "step": 174000 }, { "epoch": 0.11, "learning_rate": 4.445725264169068e-05, "loss": 1.8613, "step": 174100 }, { "epoch": 0.11, "learning_rate": 4.445405059237913e-05, "loss": 1.8769, "step": 174200 }, { "epoch": 0.11, "learning_rate": 4.445084854306756e-05, "loss": 1.8561, "step": 174300 }, { "epoch": 0.11, "learning_rate": 4.444764649375601e-05, "loss": 1.8736, "step": 174400 }, { "epoch": 0.11, "learning_rate": 4.4444444444444447e-05, "loss": 1.8798, "step": 174500 }, { "epoch": 0.11, "learning_rate": 4.4441242395132886e-05, "loss": 1.8765, "step": 174600 }, { "epoch": 0.11, "learning_rate": 4.443804034582133e-05, "loss": 1.8788, "step": 174700 }, { "epoch": 0.11, "learning_rate": 4.4434838296509766e-05, "loss": 1.8543, "step": 174800 }, { "epoch": 0.11, "learning_rate": 4.443163624719821e-05, "loss": 1.8832, "step": 174900 }, { "epoch": 0.11, "learning_rate": 4.4428434197886645e-05, "loss": 1.8764, "step": 175000 }, { "epoch": 0.11, "eval_loss": 1.7749509811401367, "eval_runtime": 91.9556, "eval_samples_per_second": 108.748, "eval_steps_per_second": 6.797, "step": 175000 }, { "epoch": 0.11, "learning_rate": 4.442523214857509e-05, "loss": 1.8597, "step": 175100 }, { "epoch": 0.11, "learning_rate": 4.442203009926353e-05, "loss": 1.8801, "step": 175200 }, { "epoch": 0.11, "learning_rate": 4.441882804995197e-05, "loss": 1.8872, "step": 175300 }, { "epoch": 0.11, "learning_rate": 4.441562600064042e-05, "loss": 1.8572, "step": 175400 }, { "epoch": 0.11, "learning_rate": 4.441242395132885e-05, "loss": 1.8672, "step": 175500 }, { "epoch": 0.11, "learning_rate": 4.44092219020173e-05, "loss": 1.8573, "step": 175600 }, { "epoch": 0.11, "learning_rate": 4.440601985270573e-05, "loss": 1.8648, "step": 175700 }, { "epoch": 0.11, "learning_rate": 4.440281780339418e-05, "loss": 1.8723, "step": 175800 }, { "epoch": 0.11, "learning_rate": 4.439961575408261e-05, "loss": 1.8742, "step": 175900 }, { "epoch": 0.11, "learning_rate": 4.4396413704771056e-05, "loss": 1.8634, "step": 176000 }, { "epoch": 0.11, "eval_loss": 1.7748626470565796, "eval_runtime": 96.4939, "eval_samples_per_second": 103.633, "eval_steps_per_second": 6.477, "step": 176000 }, { "epoch": 0.11, "learning_rate": 4.4393211655459496e-05, "loss": 1.8937, "step": 176100 }, { "epoch": 0.11, "learning_rate": 4.4390009606147936e-05, "loss": 1.8642, "step": 176200 }, { "epoch": 0.11, "learning_rate": 4.438680755683638e-05, "loss": 1.8755, "step": 176300 }, { "epoch": 0.11, "learning_rate": 4.4383605507524815e-05, "loss": 1.8818, "step": 176400 }, { "epoch": 0.11, "learning_rate": 4.438040345821326e-05, "loss": 1.8774, "step": 176500 }, { "epoch": 0.11, "learning_rate": 4.4377201408901695e-05, "loss": 1.8715, "step": 176600 }, { "epoch": 0.11, "learning_rate": 4.437399935959014e-05, "loss": 1.8869, "step": 176700 }, { "epoch": 0.11, "learning_rate": 4.437079731027858e-05, "loss": 1.8592, "step": 176800 }, { "epoch": 0.11, "learning_rate": 4.436759526096702e-05, "loss": 1.8876, "step": 176900 }, { "epoch": 0.11, "learning_rate": 4.436439321165547e-05, "loss": 1.8503, "step": 177000 }, { "epoch": 0.11, "eval_loss": 1.7741420269012451, "eval_runtime": 98.8358, "eval_samples_per_second": 101.178, "eval_steps_per_second": 6.324, "step": 177000 }, { "epoch": 0.11, "learning_rate": 4.43611911623439e-05, "loss": 1.8897, "step": 177100 }, { "epoch": 0.11, "learning_rate": 4.435798911303235e-05, "loss": 1.8976, "step": 177200 }, { "epoch": 0.11, "learning_rate": 4.435478706372078e-05, "loss": 1.8874, "step": 177300 }, { "epoch": 0.11, "learning_rate": 4.4351585014409226e-05, "loss": 1.8685, "step": 177400 }, { "epoch": 0.11, "learning_rate": 4.434838296509766e-05, "loss": 1.8562, "step": 177500 }, { "epoch": 0.11, "learning_rate": 4.4345180915786106e-05, "loss": 1.8811, "step": 177600 }, { "epoch": 0.11, "learning_rate": 4.4341978866474545e-05, "loss": 1.8564, "step": 177700 }, { "epoch": 0.11, "learning_rate": 4.4338776817162985e-05, "loss": 1.8785, "step": 177800 }, { "epoch": 0.11, "learning_rate": 4.433557476785143e-05, "loss": 1.8811, "step": 177900 }, { "epoch": 0.11, "learning_rate": 4.4332372718539865e-05, "loss": 1.8612, "step": 178000 }, { "epoch": 0.11, "eval_loss": 1.7775992155075073, "eval_runtime": 94.0824, "eval_samples_per_second": 106.29, "eval_steps_per_second": 6.643, "step": 178000 }, { "epoch": 0.11, "learning_rate": 4.432917066922831e-05, "loss": 1.8919, "step": 178100 }, { "epoch": 0.11, "learning_rate": 4.4325968619916744e-05, "loss": 1.8815, "step": 178200 }, { "epoch": 0.11, "learning_rate": 4.432276657060519e-05, "loss": 1.8659, "step": 178300 }, { "epoch": 0.11, "learning_rate": 4.431956452129363e-05, "loss": 1.8736, "step": 178400 }, { "epoch": 0.11, "learning_rate": 4.431636247198207e-05, "loss": 1.8615, "step": 178500 }, { "epoch": 0.11, "learning_rate": 4.4313160422670517e-05, "loss": 1.882, "step": 178600 }, { "epoch": 0.11, "learning_rate": 4.430995837335895e-05, "loss": 1.8745, "step": 178700 }, { "epoch": 0.11, "learning_rate": 4.4306756324047396e-05, "loss": 1.8788, "step": 178800 }, { "epoch": 0.11, "learning_rate": 4.430355427473583e-05, "loss": 1.8595, "step": 178900 }, { "epoch": 0.11, "learning_rate": 4.4300352225424275e-05, "loss": 1.8951, "step": 179000 }, { "epoch": 0.11, "eval_loss": 1.774768352508545, "eval_runtime": 90.587, "eval_samples_per_second": 110.391, "eval_steps_per_second": 6.899, "step": 179000 }, { "epoch": 0.11, "learning_rate": 4.429715017611271e-05, "loss": 1.8733, "step": 179100 }, { "epoch": 0.11, "learning_rate": 4.4293948126801155e-05, "loss": 1.8981, "step": 179200 }, { "epoch": 0.11, "learning_rate": 4.4290746077489595e-05, "loss": 1.8687, "step": 179300 }, { "epoch": 0.11, "learning_rate": 4.4287544028178034e-05, "loss": 1.8801, "step": 179400 }, { "epoch": 0.11, "learning_rate": 4.428434197886648e-05, "loss": 1.8599, "step": 179500 }, { "epoch": 0.11, "learning_rate": 4.4281139929554914e-05, "loss": 1.8863, "step": 179600 }, { "epoch": 0.12, "learning_rate": 4.427793788024336e-05, "loss": 1.877, "step": 179700 }, { "epoch": 0.12, "learning_rate": 4.427473583093179e-05, "loss": 1.8694, "step": 179800 }, { "epoch": 0.12, "learning_rate": 4.427153378162024e-05, "loss": 1.8632, "step": 179900 }, { "epoch": 0.12, "learning_rate": 4.426833173230868e-05, "loss": 1.8533, "step": 180000 }, { "epoch": 0.12, "eval_loss": 1.774195671081543, "eval_runtime": 93.8327, "eval_samples_per_second": 106.573, "eval_steps_per_second": 6.661, "step": 180000 }, { "epoch": 0.12, "learning_rate": 4.426512968299712e-05, "loss": 1.8631, "step": 180100 }, { "epoch": 0.12, "learning_rate": 4.4261927633685566e-05, "loss": 1.8693, "step": 180200 }, { "epoch": 0.12, "learning_rate": 4.4258725584374e-05, "loss": 1.8649, "step": 180300 }, { "epoch": 0.12, "learning_rate": 4.4255523535062445e-05, "loss": 1.8929, "step": 180400 }, { "epoch": 0.12, "learning_rate": 4.425232148575088e-05, "loss": 1.8611, "step": 180500 }, { "epoch": 0.12, "learning_rate": 4.4249119436439325e-05, "loss": 1.869, "step": 180600 }, { "epoch": 0.12, "learning_rate": 4.4245917387127765e-05, "loss": 1.8769, "step": 180700 }, { "epoch": 0.12, "learning_rate": 4.4242715337816204e-05, "loss": 1.8751, "step": 180800 }, { "epoch": 0.12, "learning_rate": 4.4239513288504644e-05, "loss": 1.8742, "step": 180900 }, { "epoch": 0.12, "learning_rate": 4.4236311239193084e-05, "loss": 1.8709, "step": 181000 }, { "epoch": 0.12, "eval_loss": 1.7755683660507202, "eval_runtime": 94.6605, "eval_samples_per_second": 105.641, "eval_steps_per_second": 6.603, "step": 181000 }, { "epoch": 0.12, "learning_rate": 4.423310918988153e-05, "loss": 1.8827, "step": 181100 }, { "epoch": 0.12, "learning_rate": 4.422990714056996e-05, "loss": 1.8635, "step": 181200 }, { "epoch": 0.12, "learning_rate": 4.422670509125841e-05, "loss": 1.852, "step": 181300 }, { "epoch": 0.12, "learning_rate": 4.422350304194684e-05, "loss": 1.8634, "step": 181400 }, { "epoch": 0.12, "learning_rate": 4.422030099263529e-05, "loss": 1.8884, "step": 181500 }, { "epoch": 0.12, "learning_rate": 4.421709894332373e-05, "loss": 1.8831, "step": 181600 }, { "epoch": 0.12, "learning_rate": 4.421389689401217e-05, "loss": 1.853, "step": 181700 }, { "epoch": 0.12, "learning_rate": 4.4210694844700615e-05, "loss": 1.8773, "step": 181800 }, { "epoch": 0.12, "learning_rate": 4.420749279538905e-05, "loss": 1.8605, "step": 181900 }, { "epoch": 0.12, "learning_rate": 4.4204290746077495e-05, "loss": 1.8542, "step": 182000 }, { "epoch": 0.12, "eval_loss": 1.773227334022522, "eval_runtime": 99.7593, "eval_samples_per_second": 100.241, "eval_steps_per_second": 6.265, "step": 182000 }, { "epoch": 0.12, "learning_rate": 4.420108869676593e-05, "loss": 1.8566, "step": 182100 }, { "epoch": 0.12, "learning_rate": 4.4197886647454374e-05, "loss": 1.8387, "step": 182200 }, { "epoch": 0.12, "learning_rate": 4.4194684598142814e-05, "loss": 1.8636, "step": 182300 }, { "epoch": 0.12, "learning_rate": 4.4191482548831254e-05, "loss": 1.8598, "step": 182400 }, { "epoch": 0.12, "learning_rate": 4.4188280499519693e-05, "loss": 1.8559, "step": 182500 }, { "epoch": 0.12, "learning_rate": 4.418507845020813e-05, "loss": 1.8804, "step": 182600 }, { "epoch": 0.12, "learning_rate": 4.418187640089658e-05, "loss": 1.8697, "step": 182700 }, { "epoch": 0.12, "learning_rate": 4.417867435158501e-05, "loss": 1.8567, "step": 182800 }, { "epoch": 0.12, "learning_rate": 4.417547230227346e-05, "loss": 1.8896, "step": 182900 }, { "epoch": 0.12, "learning_rate": 4.41722702529619e-05, "loss": 1.8857, "step": 183000 }, { "epoch": 0.12, "eval_loss": 1.7715845108032227, "eval_runtime": 100.1417, "eval_samples_per_second": 99.858, "eval_steps_per_second": 6.241, "step": 183000 }, { "epoch": 0.12, "learning_rate": 4.416906820365034e-05, "loss": 1.8744, "step": 183100 }, { "epoch": 0.12, "learning_rate": 4.416586615433878e-05, "loss": 1.8395, "step": 183200 }, { "epoch": 0.12, "learning_rate": 4.416266410502722e-05, "loss": 1.8687, "step": 183300 }, { "epoch": 0.12, "learning_rate": 4.4159462055715665e-05, "loss": 1.8835, "step": 183400 }, { "epoch": 0.12, "learning_rate": 4.41562600064041e-05, "loss": 1.8951, "step": 183500 }, { "epoch": 0.12, "learning_rate": 4.4153057957092544e-05, "loss": 1.8782, "step": 183600 }, { "epoch": 0.12, "learning_rate": 4.414985590778098e-05, "loss": 1.8758, "step": 183700 }, { "epoch": 0.12, "learning_rate": 4.4146653858469424e-05, "loss": 1.863, "step": 183800 }, { "epoch": 0.12, "learning_rate": 4.414345180915786e-05, "loss": 1.8533, "step": 183900 }, { "epoch": 0.12, "learning_rate": 4.41402497598463e-05, "loss": 1.8554, "step": 184000 }, { "epoch": 0.12, "eval_loss": 1.7713981866836548, "eval_runtime": 94.8479, "eval_samples_per_second": 105.432, "eval_steps_per_second": 6.589, "step": 184000 }, { "epoch": 0.12, "learning_rate": 4.413704771053474e-05, "loss": 1.8792, "step": 184100 }, { "epoch": 0.12, "learning_rate": 4.413384566122318e-05, "loss": 1.874, "step": 184200 }, { "epoch": 0.12, "learning_rate": 4.413064361191163e-05, "loss": 1.8686, "step": 184300 }, { "epoch": 0.12, "learning_rate": 4.412744156260006e-05, "loss": 1.8623, "step": 184400 }, { "epoch": 0.12, "learning_rate": 4.412423951328851e-05, "loss": 1.8693, "step": 184500 }, { "epoch": 0.12, "learning_rate": 4.412103746397695e-05, "loss": 1.8591, "step": 184600 }, { "epoch": 0.12, "learning_rate": 4.411783541466539e-05, "loss": 1.8694, "step": 184700 }, { "epoch": 0.12, "learning_rate": 4.411463336535383e-05, "loss": 1.8639, "step": 184800 }, { "epoch": 0.12, "learning_rate": 4.411143131604227e-05, "loss": 1.8581, "step": 184900 }, { "epoch": 0.12, "learning_rate": 4.4108229266730714e-05, "loss": 1.8551, "step": 185000 }, { "epoch": 0.12, "eval_loss": 1.7757482528686523, "eval_runtime": 95.3298, "eval_samples_per_second": 104.899, "eval_steps_per_second": 6.556, "step": 185000 }, { "epoch": 0.12, "learning_rate": 4.410502721741915e-05, "loss": 1.8713, "step": 185100 }, { "epoch": 0.12, "learning_rate": 4.4101825168107593e-05, "loss": 1.8721, "step": 185200 }, { "epoch": 0.12, "learning_rate": 4.409862311879603e-05, "loss": 1.8663, "step": 185300 }, { "epoch": 0.12, "learning_rate": 4.409542106948447e-05, "loss": 1.8601, "step": 185400 }, { "epoch": 0.12, "learning_rate": 4.409221902017291e-05, "loss": 1.8818, "step": 185500 }, { "epoch": 0.12, "learning_rate": 4.408901697086135e-05, "loss": 1.8579, "step": 185600 }, { "epoch": 0.12, "learning_rate": 4.408581492154979e-05, "loss": 1.876, "step": 185700 }, { "epoch": 0.12, "learning_rate": 4.408261287223823e-05, "loss": 1.8733, "step": 185800 }, { "epoch": 0.12, "learning_rate": 4.407941082292668e-05, "loss": 1.8493, "step": 185900 }, { "epoch": 0.12, "learning_rate": 4.407620877361511e-05, "loss": 1.862, "step": 186000 }, { "epoch": 0.12, "eval_loss": 1.7719496488571167, "eval_runtime": 91.8595, "eval_samples_per_second": 108.862, "eval_steps_per_second": 6.804, "step": 186000 }, { "epoch": 0.12, "learning_rate": 4.407300672430356e-05, "loss": 1.8677, "step": 186100 }, { "epoch": 0.12, "learning_rate": 4.4069804674992e-05, "loss": 1.8585, "step": 186200 }, { "epoch": 0.12, "learning_rate": 4.406660262568044e-05, "loss": 1.8733, "step": 186300 }, { "epoch": 0.12, "learning_rate": 4.406340057636888e-05, "loss": 1.8625, "step": 186400 }, { "epoch": 0.12, "learning_rate": 4.406019852705732e-05, "loss": 1.88, "step": 186500 }, { "epoch": 0.12, "learning_rate": 4.405699647774576e-05, "loss": 1.8802, "step": 186600 }, { "epoch": 0.12, "learning_rate": 4.4053794428434196e-05, "loss": 1.8615, "step": 186700 }, { "epoch": 0.12, "learning_rate": 4.405059237912264e-05, "loss": 1.8629, "step": 186800 }, { "epoch": 0.12, "learning_rate": 4.404739032981108e-05, "loss": 1.8875, "step": 186900 }, { "epoch": 0.12, "learning_rate": 4.404418828049952e-05, "loss": 1.8573, "step": 187000 }, { "epoch": 0.12, "eval_loss": 1.7745039463043213, "eval_runtime": 95.4388, "eval_samples_per_second": 104.779, "eval_steps_per_second": 6.549, "step": 187000 }, { "epoch": 0.12, "learning_rate": 4.404098623118796e-05, "loss": 1.8776, "step": 187100 }, { "epoch": 0.12, "learning_rate": 4.40377841818764e-05, "loss": 1.8793, "step": 187200 }, { "epoch": 0.12, "learning_rate": 4.403458213256484e-05, "loss": 1.8601, "step": 187300 }, { "epoch": 0.12, "learning_rate": 4.403138008325328e-05, "loss": 1.8726, "step": 187400 }, { "epoch": 0.12, "learning_rate": 4.402817803394173e-05, "loss": 1.8446, "step": 187500 }, { "epoch": 0.12, "learning_rate": 4.402497598463017e-05, "loss": 1.8585, "step": 187600 }, { "epoch": 0.12, "learning_rate": 4.402177393531861e-05, "loss": 1.8849, "step": 187700 }, { "epoch": 0.12, "learning_rate": 4.401857188600705e-05, "loss": 1.8503, "step": 187800 }, { "epoch": 0.12, "learning_rate": 4.401536983669549e-05, "loss": 1.8597, "step": 187900 }, { "epoch": 0.12, "learning_rate": 4.4012167787383926e-05, "loss": 1.8595, "step": 188000 }, { "epoch": 0.12, "eval_loss": 1.7738285064697266, "eval_runtime": 96.4393, "eval_samples_per_second": 103.692, "eval_steps_per_second": 6.481, "step": 188000 }, { "epoch": 0.12, "learning_rate": 4.4008965738072366e-05, "loss": 1.8814, "step": 188100 }, { "epoch": 0.12, "learning_rate": 4.4005763688760806e-05, "loss": 1.853, "step": 188200 }, { "epoch": 0.12, "learning_rate": 4.400256163944925e-05, "loss": 1.8701, "step": 188300 }, { "epoch": 0.12, "learning_rate": 4.399935959013769e-05, "loss": 1.8698, "step": 188400 }, { "epoch": 0.12, "learning_rate": 4.399615754082613e-05, "loss": 1.8746, "step": 188500 }, { "epoch": 0.12, "learning_rate": 4.399295549151457e-05, "loss": 1.8644, "step": 188600 }, { "epoch": 0.12, "learning_rate": 4.398975344220301e-05, "loss": 1.8676, "step": 188700 }, { "epoch": 0.12, "learning_rate": 4.398655139289145e-05, "loss": 1.8712, "step": 188800 }, { "epoch": 0.12, "learning_rate": 4.398334934357989e-05, "loss": 1.8688, "step": 188900 }, { "epoch": 0.12, "learning_rate": 4.398014729426833e-05, "loss": 1.8555, "step": 189000 }, { "epoch": 0.12, "eval_loss": 1.7717466354370117, "eval_runtime": 95.2662, "eval_samples_per_second": 104.969, "eval_steps_per_second": 6.561, "step": 189000 }, { "epoch": 0.12, "learning_rate": 4.397694524495678e-05, "loss": 1.8655, "step": 189100 }, { "epoch": 0.12, "learning_rate": 4.397374319564522e-05, "loss": 1.8689, "step": 189200 }, { "epoch": 0.12, "learning_rate": 4.397054114633366e-05, "loss": 1.8546, "step": 189300 }, { "epoch": 0.12, "learning_rate": 4.3967339097022096e-05, "loss": 1.8657, "step": 189400 }, { "epoch": 0.12, "learning_rate": 4.3964137047710536e-05, "loss": 1.8678, "step": 189500 }, { "epoch": 0.12, "learning_rate": 4.3960934998398976e-05, "loss": 1.875, "step": 189600 }, { "epoch": 0.12, "learning_rate": 4.3957732949087416e-05, "loss": 1.895, "step": 189700 }, { "epoch": 0.12, "learning_rate": 4.3954530899775855e-05, "loss": 1.8556, "step": 189800 }, { "epoch": 0.12, "learning_rate": 4.39513288504643e-05, "loss": 1.8706, "step": 189900 }, { "epoch": 0.12, "learning_rate": 4.394812680115274e-05, "loss": 1.8577, "step": 190000 }, { "epoch": 0.12, "eval_loss": 1.773512363433838, "eval_runtime": 96.2192, "eval_samples_per_second": 103.929, "eval_steps_per_second": 6.496, "step": 190000 }, { "epoch": 0.12, "learning_rate": 4.394492475184118e-05, "loss": 1.8433, "step": 190100 }, { "epoch": 0.12, "learning_rate": 4.394172270252962e-05, "loss": 1.8646, "step": 190200 }, { "epoch": 0.12, "learning_rate": 4.393852065321806e-05, "loss": 1.8594, "step": 190300 }, { "epoch": 0.12, "learning_rate": 4.39353186039065e-05, "loss": 1.8635, "step": 190400 }, { "epoch": 0.12, "learning_rate": 4.393211655459494e-05, "loss": 1.8669, "step": 190500 }, { "epoch": 0.12, "learning_rate": 4.392891450528339e-05, "loss": 1.8544, "step": 190600 }, { "epoch": 0.12, "learning_rate": 4.3925712455971827e-05, "loss": 1.8457, "step": 190700 }, { "epoch": 0.12, "learning_rate": 4.3922510406660266e-05, "loss": 1.8593, "step": 190800 }, { "epoch": 0.12, "learning_rate": 4.3919308357348706e-05, "loss": 1.8623, "step": 190900 }, { "epoch": 0.12, "learning_rate": 4.3916106308037146e-05, "loss": 1.8647, "step": 191000 }, { "epoch": 0.12, "eval_loss": 1.7729767560958862, "eval_runtime": 94.6541, "eval_samples_per_second": 105.648, "eval_steps_per_second": 6.603, "step": 191000 }, { "epoch": 0.12, "learning_rate": 4.3912904258725585e-05, "loss": 1.8567, "step": 191100 }, { "epoch": 0.12, "learning_rate": 4.3909702209414025e-05, "loss": 1.8315, "step": 191200 }, { "epoch": 0.12, "learning_rate": 4.3906500160102465e-05, "loss": 1.8576, "step": 191300 }, { "epoch": 0.12, "learning_rate": 4.3903298110790905e-05, "loss": 1.8679, "step": 191400 }, { "epoch": 0.12, "learning_rate": 4.390009606147935e-05, "loss": 1.8629, "step": 191500 }, { "epoch": 0.12, "learning_rate": 4.389689401216779e-05, "loss": 1.8729, "step": 191600 }, { "epoch": 0.12, "learning_rate": 4.389369196285623e-05, "loss": 1.8678, "step": 191700 }, { "epoch": 0.12, "learning_rate": 4.389048991354467e-05, "loss": 1.8657, "step": 191800 }, { "epoch": 0.12, "learning_rate": 4.388728786423311e-05, "loss": 1.8628, "step": 191900 }, { "epoch": 0.12, "learning_rate": 4.388408581492155e-05, "loss": 1.8803, "step": 192000 }, { "epoch": 0.12, "eval_loss": 1.7695930004119873, "eval_runtime": 91.5649, "eval_samples_per_second": 109.212, "eval_steps_per_second": 6.826, "step": 192000 }, { "epoch": 0.12, "learning_rate": 4.388088376560999e-05, "loss": 1.8529, "step": 192100 }, { "epoch": 0.12, "learning_rate": 4.3877681716298436e-05, "loss": 1.8522, "step": 192200 }, { "epoch": 0.12, "learning_rate": 4.3874479666986876e-05, "loss": 1.8657, "step": 192300 }, { "epoch": 0.12, "learning_rate": 4.3871277617675316e-05, "loss": 1.8741, "step": 192400 }, { "epoch": 0.12, "learning_rate": 4.3868075568363755e-05, "loss": 1.8726, "step": 192500 }, { "epoch": 0.12, "learning_rate": 4.3864873519052195e-05, "loss": 1.8651, "step": 192600 }, { "epoch": 0.12, "learning_rate": 4.3861671469740635e-05, "loss": 1.8729, "step": 192700 }, { "epoch": 0.12, "learning_rate": 4.3858469420429075e-05, "loss": 1.848, "step": 192800 }, { "epoch": 0.12, "learning_rate": 4.385526737111752e-05, "loss": 1.8647, "step": 192900 }, { "epoch": 0.12, "learning_rate": 4.3852065321805954e-05, "loss": 1.8555, "step": 193000 }, { "epoch": 0.12, "eval_loss": 1.769763469696045, "eval_runtime": 94.286, "eval_samples_per_second": 106.06, "eval_steps_per_second": 6.629, "step": 193000 }, { "epoch": 0.12, "learning_rate": 4.38488632724944e-05, "loss": 1.8807, "step": 193100 }, { "epoch": 0.12, "learning_rate": 4.384566122318284e-05, "loss": 1.8522, "step": 193200 }, { "epoch": 0.12, "learning_rate": 4.384245917387128e-05, "loss": 1.8726, "step": 193300 }, { "epoch": 0.12, "learning_rate": 4.383925712455972e-05, "loss": 1.8678, "step": 193400 }, { "epoch": 0.12, "learning_rate": 4.383605507524816e-05, "loss": 1.8351, "step": 193500 }, { "epoch": 0.12, "learning_rate": 4.38328530259366e-05, "loss": 1.8698, "step": 193600 }, { "epoch": 0.12, "learning_rate": 4.382965097662504e-05, "loss": 1.8716, "step": 193700 }, { "epoch": 0.12, "learning_rate": 4.3826448927313486e-05, "loss": 1.8453, "step": 193800 }, { "epoch": 0.12, "learning_rate": 4.3823246878001925e-05, "loss": 1.8598, "step": 193900 }, { "epoch": 0.12, "learning_rate": 4.3820044828690365e-05, "loss": 1.8661, "step": 194000 }, { "epoch": 0.12, "eval_loss": 1.7693036794662476, "eval_runtime": 93.8252, "eval_samples_per_second": 106.581, "eval_steps_per_second": 6.661, "step": 194000 }, { "epoch": 0.12, "learning_rate": 4.3816842779378805e-05, "loss": 1.8495, "step": 194100 }, { "epoch": 0.12, "learning_rate": 4.3813640730067245e-05, "loss": 1.8407, "step": 194200 }, { "epoch": 0.12, "learning_rate": 4.3810438680755684e-05, "loss": 1.8261, "step": 194300 }, { "epoch": 0.12, "learning_rate": 4.3807236631444124e-05, "loss": 1.8694, "step": 194400 }, { "epoch": 0.12, "learning_rate": 4.380403458213257e-05, "loss": 1.8616, "step": 194500 }, { "epoch": 0.12, "learning_rate": 4.3800832532821003e-05, "loss": 1.8581, "step": 194600 }, { "epoch": 0.12, "learning_rate": 4.379763048350945e-05, "loss": 1.8521, "step": 194700 }, { "epoch": 0.12, "learning_rate": 4.379442843419789e-05, "loss": 1.8501, "step": 194800 }, { "epoch": 0.12, "learning_rate": 4.379122638488633e-05, "loss": 1.8626, "step": 194900 }, { "epoch": 0.12, "learning_rate": 4.378802433557477e-05, "loss": 1.8678, "step": 195000 }, { "epoch": 0.12, "eval_loss": 1.769243597984314, "eval_runtime": 93.2508, "eval_samples_per_second": 107.238, "eval_steps_per_second": 6.702, "step": 195000 }, { "epoch": 0.12, "learning_rate": 4.378482228626321e-05, "loss": 1.8551, "step": 195100 }, { "epoch": 0.12, "learning_rate": 4.3781620236951655e-05, "loss": 1.8641, "step": 195200 }, { "epoch": 0.12, "learning_rate": 4.377841818764009e-05, "loss": 1.8563, "step": 195300 }, { "epoch": 0.13, "learning_rate": 4.3775216138328535e-05, "loss": 1.8479, "step": 195400 }, { "epoch": 0.13, "learning_rate": 4.3772014089016975e-05, "loss": 1.8758, "step": 195500 }, { "epoch": 0.13, "learning_rate": 4.3768812039705414e-05, "loss": 1.844, "step": 195600 }, { "epoch": 0.13, "learning_rate": 4.3765609990393854e-05, "loss": 1.8573, "step": 195700 }, { "epoch": 0.13, "learning_rate": 4.3762407941082294e-05, "loss": 1.8744, "step": 195800 }, { "epoch": 0.13, "learning_rate": 4.3759205891770734e-05, "loss": 1.8401, "step": 195900 }, { "epoch": 0.13, "learning_rate": 4.375600384245917e-05, "loss": 1.856, "step": 196000 }, { "epoch": 0.13, "eval_loss": 1.7694171667099, "eval_runtime": 95.6024, "eval_samples_per_second": 104.6, "eval_steps_per_second": 6.537, "step": 196000 }, { "epoch": 0.13, "learning_rate": 4.375280179314762e-05, "loss": 1.8389, "step": 196100 }, { "epoch": 0.13, "learning_rate": 4.374959974383605e-05, "loss": 1.872, "step": 196200 }, { "epoch": 0.13, "learning_rate": 4.37463976945245e-05, "loss": 1.8367, "step": 196300 }, { "epoch": 0.13, "learning_rate": 4.374319564521294e-05, "loss": 1.8753, "step": 196400 }, { "epoch": 0.13, "learning_rate": 4.373999359590138e-05, "loss": 1.8609, "step": 196500 }, { "epoch": 0.13, "learning_rate": 4.373679154658982e-05, "loss": 1.8528, "step": 196600 }, { "epoch": 0.13, "learning_rate": 4.373358949727826e-05, "loss": 1.8468, "step": 196700 }, { "epoch": 0.13, "learning_rate": 4.3730387447966705e-05, "loss": 1.8593, "step": 196800 }, { "epoch": 0.13, "learning_rate": 4.372718539865514e-05, "loss": 1.8375, "step": 196900 }, { "epoch": 0.13, "learning_rate": 4.3723983349343584e-05, "loss": 1.8495, "step": 197000 }, { "epoch": 0.13, "eval_loss": 1.7682294845581055, "eval_runtime": 93.1, "eval_samples_per_second": 107.411, "eval_steps_per_second": 6.713, "step": 197000 }, { "epoch": 0.13, "learning_rate": 4.3720781300032024e-05, "loss": 1.8553, "step": 197100 }, { "epoch": 0.13, "learning_rate": 4.3717579250720464e-05, "loss": 1.8482, "step": 197200 }, { "epoch": 0.13, "learning_rate": 4.3714377201408904e-05, "loss": 1.8613, "step": 197300 }, { "epoch": 0.13, "learning_rate": 4.371117515209734e-05, "loss": 1.8506, "step": 197400 }, { "epoch": 0.13, "learning_rate": 4.370797310278579e-05, "loss": 1.8588, "step": 197500 }, { "epoch": 0.13, "learning_rate": 4.370477105347422e-05, "loss": 1.854, "step": 197600 }, { "epoch": 0.13, "learning_rate": 4.370156900416267e-05, "loss": 1.8779, "step": 197700 }, { "epoch": 0.13, "learning_rate": 4.36983669548511e-05, "loss": 1.8663, "step": 197800 }, { "epoch": 0.13, "learning_rate": 4.369516490553955e-05, "loss": 1.8454, "step": 197900 }, { "epoch": 0.13, "learning_rate": 4.369196285622799e-05, "loss": 1.8563, "step": 198000 }, { "epoch": 0.13, "eval_loss": 1.7728908061981201, "eval_runtime": 94.2069, "eval_samples_per_second": 106.149, "eval_steps_per_second": 6.634, "step": 198000 }, { "epoch": 0.13, "learning_rate": 4.368876080691643e-05, "loss": 1.8612, "step": 198100 }, { "epoch": 0.13, "learning_rate": 4.3685558757604875e-05, "loss": 1.8685, "step": 198200 }, { "epoch": 0.13, "learning_rate": 4.368235670829331e-05, "loss": 1.8455, "step": 198300 }, { "epoch": 0.13, "learning_rate": 4.3679154658981754e-05, "loss": 1.8508, "step": 198400 }, { "epoch": 0.13, "learning_rate": 4.367595260967019e-05, "loss": 1.8598, "step": 198500 }, { "epoch": 0.13, "learning_rate": 4.3672750560358634e-05, "loss": 1.8452, "step": 198600 }, { "epoch": 0.13, "learning_rate": 4.3669548511047073e-05, "loss": 1.8553, "step": 198700 }, { "epoch": 0.13, "learning_rate": 4.366634646173551e-05, "loss": 1.8544, "step": 198800 }, { "epoch": 0.13, "learning_rate": 4.366314441242395e-05, "loss": 1.8595, "step": 198900 }, { "epoch": 0.13, "learning_rate": 4.365994236311239e-05, "loss": 1.8592, "step": 199000 }, { "epoch": 0.13, "eval_loss": 1.768676996231079, "eval_runtime": 92.7789, "eval_samples_per_second": 107.783, "eval_steps_per_second": 6.736, "step": 199000 }, { "epoch": 0.13, "learning_rate": 4.365674031380084e-05, "loss": 1.8603, "step": 199100 }, { "epoch": 0.13, "learning_rate": 4.365353826448927e-05, "loss": 1.8371, "step": 199200 }, { "epoch": 0.13, "learning_rate": 4.365033621517772e-05, "loss": 1.853, "step": 199300 }, { "epoch": 0.13, "learning_rate": 4.364713416586615e-05, "loss": 1.8568, "step": 199400 }, { "epoch": 0.13, "learning_rate": 4.36439321165546e-05, "loss": 1.8804, "step": 199500 }, { "epoch": 0.13, "learning_rate": 4.364073006724304e-05, "loss": 1.8739, "step": 199600 }, { "epoch": 0.13, "learning_rate": 4.363752801793148e-05, "loss": 1.8465, "step": 199700 }, { "epoch": 0.13, "learning_rate": 4.3634325968619924e-05, "loss": 1.8543, "step": 199800 }, { "epoch": 0.13, "learning_rate": 4.363112391930836e-05, "loss": 1.8383, "step": 199900 }, { "epoch": 0.13, "learning_rate": 4.3627921869996804e-05, "loss": 1.8534, "step": 200000 }, { "epoch": 0.13, "eval_loss": 1.7683591842651367, "eval_runtime": 93.2299, "eval_samples_per_second": 107.262, "eval_steps_per_second": 6.704, "step": 200000 }, { "epoch": 0.13, "learning_rate": 4.3624719820685237e-05, "loss": 1.859, "step": 200100 }, { "epoch": 0.13, "learning_rate": 4.362151777137368e-05, "loss": 1.863, "step": 200200 }, { "epoch": 0.13, "learning_rate": 4.361831572206212e-05, "loss": 1.8373, "step": 200300 }, { "epoch": 0.13, "learning_rate": 4.361511367275056e-05, "loss": 1.8403, "step": 200400 }, { "epoch": 0.13, "learning_rate": 4.361191162343901e-05, "loss": 1.8272, "step": 200500 }, { "epoch": 0.13, "learning_rate": 4.360870957412744e-05, "loss": 1.8414, "step": 200600 }, { "epoch": 0.13, "learning_rate": 4.360550752481589e-05, "loss": 1.8668, "step": 200700 }, { "epoch": 0.13, "learning_rate": 4.360230547550432e-05, "loss": 1.8513, "step": 200800 }, { "epoch": 0.13, "learning_rate": 4.359910342619277e-05, "loss": 1.8684, "step": 200900 }, { "epoch": 0.13, "learning_rate": 4.35959013768812e-05, "loss": 1.861, "step": 201000 }, { "epoch": 0.13, "eval_loss": 1.7691121101379395, "eval_runtime": 93.5524, "eval_samples_per_second": 106.892, "eval_steps_per_second": 6.681, "step": 201000 }, { "epoch": 0.13, "learning_rate": 4.359269932756965e-05, "loss": 1.8648, "step": 201100 }, { "epoch": 0.13, "learning_rate": 4.358949727825809e-05, "loss": 1.841, "step": 201200 }, { "epoch": 0.13, "learning_rate": 4.358629522894653e-05, "loss": 1.8596, "step": 201300 }, { "epoch": 0.13, "learning_rate": 4.3583093179634973e-05, "loss": 1.8453, "step": 201400 }, { "epoch": 0.13, "learning_rate": 4.3579891130323406e-05, "loss": 1.8352, "step": 201500 }, { "epoch": 0.13, "learning_rate": 4.357668908101185e-05, "loss": 1.8383, "step": 201600 }, { "epoch": 0.13, "learning_rate": 4.3573487031700286e-05, "loss": 1.8443, "step": 201700 }, { "epoch": 0.13, "learning_rate": 4.357028498238873e-05, "loss": 1.8383, "step": 201800 }, { "epoch": 0.13, "learning_rate": 4.356708293307717e-05, "loss": 1.8408, "step": 201900 }, { "epoch": 0.13, "learning_rate": 4.356388088376561e-05, "loss": 1.8235, "step": 202000 }, { "epoch": 0.13, "eval_loss": 1.7685824632644653, "eval_runtime": 92.6875, "eval_samples_per_second": 107.889, "eval_steps_per_second": 6.743, "step": 202000 }, { "epoch": 0.13, "learning_rate": 4.356067883445406e-05, "loss": 1.8827, "step": 202100 }, { "epoch": 0.13, "learning_rate": 4.355747678514249e-05, "loss": 1.831, "step": 202200 }, { "epoch": 0.13, "learning_rate": 4.355427473583094e-05, "loss": 1.8612, "step": 202300 }, { "epoch": 0.13, "learning_rate": 4.355107268651937e-05, "loss": 1.8583, "step": 202400 }, { "epoch": 0.13, "learning_rate": 4.354787063720782e-05, "loss": 1.8545, "step": 202500 }, { "epoch": 0.13, "learning_rate": 4.354466858789625e-05, "loss": 1.8771, "step": 202600 }, { "epoch": 0.13, "learning_rate": 4.35414665385847e-05, "loss": 1.868, "step": 202700 }, { "epoch": 0.13, "learning_rate": 4.3538264489273137e-05, "loss": 1.857, "step": 202800 }, { "epoch": 0.13, "learning_rate": 4.3535062439961576e-05, "loss": 1.8301, "step": 202900 }, { "epoch": 0.13, "learning_rate": 4.353186039065002e-05, "loss": 1.8592, "step": 203000 }, { "epoch": 0.13, "eval_loss": 1.7683054208755493, "eval_runtime": 94.8104, "eval_samples_per_second": 105.474, "eval_steps_per_second": 6.592, "step": 203000 }, { "epoch": 0.13, "learning_rate": 4.3528658341338456e-05, "loss": 1.8352, "step": 203100 }, { "epoch": 0.13, "learning_rate": 4.35254562920269e-05, "loss": 1.8482, "step": 203200 }, { "epoch": 0.13, "learning_rate": 4.3522254242715335e-05, "loss": 1.8523, "step": 203300 }, { "epoch": 0.13, "learning_rate": 4.351905219340378e-05, "loss": 1.8334, "step": 203400 }, { "epoch": 0.13, "learning_rate": 4.351585014409222e-05, "loss": 1.8584, "step": 203500 }, { "epoch": 0.13, "learning_rate": 4.351264809478066e-05, "loss": 1.8525, "step": 203600 }, { "epoch": 0.13, "learning_rate": 4.350944604546911e-05, "loss": 1.8562, "step": 203700 }, { "epoch": 0.13, "learning_rate": 4.350624399615754e-05, "loss": 1.8901, "step": 203800 }, { "epoch": 0.13, "learning_rate": 4.350304194684599e-05, "loss": 1.8387, "step": 203900 }, { "epoch": 0.13, "learning_rate": 4.349983989753442e-05, "loss": 1.8781, "step": 204000 }, { "epoch": 0.13, "eval_loss": 1.7697824239730835, "eval_runtime": 95.9153, "eval_samples_per_second": 104.259, "eval_steps_per_second": 6.516, "step": 204000 }, { "epoch": 0.13, "learning_rate": 4.349663784822287e-05, "loss": 1.8595, "step": 204100 }, { "epoch": 0.13, "learning_rate": 4.34934357989113e-05, "loss": 1.8445, "step": 204200 }, { "epoch": 0.13, "learning_rate": 4.3490233749599746e-05, "loss": 1.8463, "step": 204300 }, { "epoch": 0.13, "learning_rate": 4.3487031700288186e-05, "loss": 1.8445, "step": 204400 }, { "epoch": 0.13, "learning_rate": 4.3483829650976626e-05, "loss": 1.8245, "step": 204500 }, { "epoch": 0.13, "learning_rate": 4.348062760166507e-05, "loss": 1.8351, "step": 204600 }, { "epoch": 0.13, "learning_rate": 4.3477425552353505e-05, "loss": 1.8507, "step": 204700 }, { "epoch": 0.13, "learning_rate": 4.347422350304195e-05, "loss": 1.8355, "step": 204800 }, { "epoch": 0.13, "learning_rate": 4.3471021453730385e-05, "loss": 1.8627, "step": 204900 }, { "epoch": 0.13, "learning_rate": 4.346781940441883e-05, "loss": 1.8465, "step": 205000 }, { "epoch": 0.13, "eval_loss": 1.768517017364502, "eval_runtime": 94.3547, "eval_samples_per_second": 105.983, "eval_steps_per_second": 6.624, "step": 205000 }, { "epoch": 0.13, "learning_rate": 4.346461735510727e-05, "loss": 1.8666, "step": 205100 }, { "epoch": 0.13, "learning_rate": 4.346141530579571e-05, "loss": 1.839, "step": 205200 }, { "epoch": 0.13, "learning_rate": 4.345821325648416e-05, "loss": 1.8547, "step": 205300 }, { "epoch": 0.13, "learning_rate": 4.345501120717259e-05, "loss": 1.8561, "step": 205400 }, { "epoch": 0.13, "learning_rate": 4.345180915786104e-05, "loss": 1.8509, "step": 205500 }, { "epoch": 0.13, "learning_rate": 4.344860710854947e-05, "loss": 1.8583, "step": 205600 }, { "epoch": 0.13, "learning_rate": 4.3445405059237916e-05, "loss": 1.8505, "step": 205700 }, { "epoch": 0.13, "learning_rate": 4.3442203009926356e-05, "loss": 1.8386, "step": 205800 }, { "epoch": 0.13, "learning_rate": 4.3439000960614796e-05, "loss": 1.8544, "step": 205900 }, { "epoch": 0.13, "learning_rate": 4.3435798911303235e-05, "loss": 1.8672, "step": 206000 }, { "epoch": 0.13, "eval_loss": 1.7675573825836182, "eval_runtime": 91.0552, "eval_samples_per_second": 109.824, "eval_steps_per_second": 6.864, "step": 206000 }, { "epoch": 0.13, "learning_rate": 4.3432596861991675e-05, "loss": 1.8641, "step": 206100 }, { "epoch": 0.13, "learning_rate": 4.342939481268012e-05, "loss": 1.8533, "step": 206200 }, { "epoch": 0.13, "learning_rate": 4.3426192763368555e-05, "loss": 1.8477, "step": 206300 }, { "epoch": 0.13, "learning_rate": 4.3422990714057e-05, "loss": 1.8507, "step": 206400 }, { "epoch": 0.13, "learning_rate": 4.3419788664745434e-05, "loss": 1.8501, "step": 206500 }, { "epoch": 0.13, "learning_rate": 4.341658661543388e-05, "loss": 1.8583, "step": 206600 }, { "epoch": 0.13, "learning_rate": 4.341338456612232e-05, "loss": 1.8613, "step": 206700 }, { "epoch": 0.13, "learning_rate": 4.341018251681076e-05, "loss": 1.8363, "step": 206800 }, { "epoch": 0.13, "learning_rate": 4.34069804674992e-05, "loss": 1.8268, "step": 206900 }, { "epoch": 0.13, "learning_rate": 4.340377841818764e-05, "loss": 1.8367, "step": 207000 }, { "epoch": 0.13, "eval_loss": 1.7679375410079956, "eval_runtime": 93.969, "eval_samples_per_second": 106.418, "eval_steps_per_second": 6.651, "step": 207000 }, { "epoch": 0.13, "learning_rate": 4.3400576368876086e-05, "loss": 1.8584, "step": 207100 }, { "epoch": 0.13, "learning_rate": 4.339737431956452e-05, "loss": 1.8229, "step": 207200 }, { "epoch": 0.13, "learning_rate": 4.3394172270252965e-05, "loss": 1.8712, "step": 207300 }, { "epoch": 0.13, "learning_rate": 4.3390970220941405e-05, "loss": 1.8533, "step": 207400 }, { "epoch": 0.13, "learning_rate": 4.3387768171629845e-05, "loss": 1.8455, "step": 207500 }, { "epoch": 0.13, "learning_rate": 4.3384566122318285e-05, "loss": 1.8457, "step": 207600 }, { "epoch": 0.13, "learning_rate": 4.3381364073006724e-05, "loss": 1.8478, "step": 207700 }, { "epoch": 0.13, "learning_rate": 4.337816202369517e-05, "loss": 1.8559, "step": 207800 }, { "epoch": 0.13, "learning_rate": 4.3374959974383604e-05, "loss": 1.8403, "step": 207900 }, { "epoch": 0.13, "learning_rate": 4.337175792507205e-05, "loss": 1.8385, "step": 208000 }, { "epoch": 0.13, "eval_loss": 1.7671117782592773, "eval_runtime": 90.9027, "eval_samples_per_second": 110.008, "eval_steps_per_second": 6.875, "step": 208000 }, { "epoch": 0.13, "learning_rate": 4.336855587576049e-05, "loss": 1.8406, "step": 208100 }, { "epoch": 0.13, "learning_rate": 4.336535382644893e-05, "loss": 1.8532, "step": 208200 }, { "epoch": 0.13, "learning_rate": 4.336215177713737e-05, "loss": 1.8672, "step": 208300 }, { "epoch": 0.13, "learning_rate": 4.335894972782581e-05, "loss": 1.8292, "step": 208400 }, { "epoch": 0.13, "learning_rate": 4.335574767851425e-05, "loss": 1.8179, "step": 208500 }, { "epoch": 0.13, "learning_rate": 4.335254562920269e-05, "loss": 1.8508, "step": 208600 }, { "epoch": 0.13, "learning_rate": 4.3349343579891135e-05, "loss": 1.8628, "step": 208700 }, { "epoch": 0.13, "learning_rate": 4.334614153057957e-05, "loss": 1.8462, "step": 208800 }, { "epoch": 0.13, "learning_rate": 4.3342939481268015e-05, "loss": 1.8738, "step": 208900 }, { "epoch": 0.13, "learning_rate": 4.3339737431956455e-05, "loss": 1.8487, "step": 209000 }, { "epoch": 0.13, "eval_loss": 1.7670972347259521, "eval_runtime": 94.5201, "eval_samples_per_second": 105.798, "eval_steps_per_second": 6.612, "step": 209000 }, { "epoch": 0.13, "learning_rate": 4.3336535382644894e-05, "loss": 1.8638, "step": 209100 }, { "epoch": 0.13, "learning_rate": 4.3333333333333334e-05, "loss": 1.847, "step": 209200 }, { "epoch": 0.13, "learning_rate": 4.3330131284021774e-05, "loss": 1.8519, "step": 209300 }, { "epoch": 0.13, "learning_rate": 4.332692923471022e-05, "loss": 1.8423, "step": 209400 }, { "epoch": 0.13, "learning_rate": 4.332372718539865e-05, "loss": 1.8488, "step": 209500 }, { "epoch": 0.13, "learning_rate": 4.33205251360871e-05, "loss": 1.83, "step": 209600 }, { "epoch": 0.13, "learning_rate": 4.331732308677554e-05, "loss": 1.8246, "step": 209700 }, { "epoch": 0.13, "learning_rate": 4.331412103746398e-05, "loss": 1.8489, "step": 209800 }, { "epoch": 0.13, "learning_rate": 4.331091898815242e-05, "loss": 1.8344, "step": 209900 }, { "epoch": 0.13, "learning_rate": 4.330771693884086e-05, "loss": 1.8523, "step": 210000 }, { "epoch": 0.13, "eval_loss": 1.7647335529327393, "eval_runtime": 92.1346, "eval_samples_per_second": 108.537, "eval_steps_per_second": 6.784, "step": 210000 }, { "epoch": 0.13, "learning_rate": 4.33045148895293e-05, "loss": 1.8513, "step": 210100 }, { "epoch": 0.13, "learning_rate": 4.330131284021774e-05, "loss": 1.8485, "step": 210200 }, { "epoch": 0.13, "learning_rate": 4.3298110790906185e-05, "loss": 1.8617, "step": 210300 }, { "epoch": 0.13, "learning_rate": 4.3294908741594624e-05, "loss": 1.8263, "step": 210400 }, { "epoch": 0.13, "learning_rate": 4.3291706692283064e-05, "loss": 1.8583, "step": 210500 }, { "epoch": 0.13, "learning_rate": 4.3288504642971504e-05, "loss": 1.8546, "step": 210600 }, { "epoch": 0.13, "learning_rate": 4.3285302593659944e-05, "loss": 1.8356, "step": 210700 }, { "epoch": 0.13, "learning_rate": 4.3282100544348383e-05, "loss": 1.8408, "step": 210800 }, { "epoch": 0.13, "learning_rate": 4.327889849503682e-05, "loss": 1.8605, "step": 210900 }, { "epoch": 0.14, "learning_rate": 4.327569644572527e-05, "loss": 1.8364, "step": 211000 }, { "epoch": 0.14, "eval_loss": 1.7665718793869019, "eval_runtime": 95.0008, "eval_samples_per_second": 105.262, "eval_steps_per_second": 6.579, "step": 211000 }, { "epoch": 0.14, "learning_rate": 4.32724943964137e-05, "loss": 1.8425, "step": 211100 }, { "epoch": 0.14, "learning_rate": 4.326929234710215e-05, "loss": 1.8311, "step": 211200 }, { "epoch": 0.14, "learning_rate": 4.326609029779059e-05, "loss": 1.8672, "step": 211300 }, { "epoch": 0.14, "learning_rate": 4.326288824847903e-05, "loss": 1.8362, "step": 211400 }, { "epoch": 0.14, "learning_rate": 4.325968619916747e-05, "loss": 1.8275, "step": 211500 }, { "epoch": 0.14, "learning_rate": 4.325648414985591e-05, "loss": 1.857, "step": 211600 }, { "epoch": 0.14, "learning_rate": 4.325328210054435e-05, "loss": 1.8349, "step": 211700 }, { "epoch": 0.14, "learning_rate": 4.325008005123279e-05, "loss": 1.8434, "step": 211800 }, { "epoch": 0.14, "learning_rate": 4.3246878001921234e-05, "loss": 1.8369, "step": 211900 }, { "epoch": 0.14, "learning_rate": 4.3243675952609674e-05, "loss": 1.8234, "step": 212000 }, { "epoch": 0.14, "eval_loss": 1.7655366659164429, "eval_runtime": 90.2103, "eval_samples_per_second": 110.852, "eval_steps_per_second": 6.928, "step": 212000 }, { "epoch": 0.14, "learning_rate": 4.3240473903298114e-05, "loss": 1.8368, "step": 212100 }, { "epoch": 0.14, "learning_rate": 4.323727185398655e-05, "loss": 1.8223, "step": 212200 }, { "epoch": 0.14, "learning_rate": 4.323406980467499e-05, "loss": 1.8431, "step": 212300 }, { "epoch": 0.14, "learning_rate": 4.323086775536343e-05, "loss": 1.8389, "step": 212400 }, { "epoch": 0.14, "learning_rate": 4.322766570605187e-05, "loss": 1.8627, "step": 212500 }, { "epoch": 0.14, "learning_rate": 4.322446365674032e-05, "loss": 1.8561, "step": 212600 }, { "epoch": 0.14, "learning_rate": 4.322126160742876e-05, "loss": 1.8477, "step": 212700 }, { "epoch": 0.14, "learning_rate": 4.32180595581172e-05, "loss": 1.8493, "step": 212800 }, { "epoch": 0.14, "learning_rate": 4.321485750880564e-05, "loss": 1.8303, "step": 212900 }, { "epoch": 0.14, "learning_rate": 4.321165545949408e-05, "loss": 1.8558, "step": 213000 }, { "epoch": 0.14, "eval_loss": 1.7655925750732422, "eval_runtime": 94.3352, "eval_samples_per_second": 106.005, "eval_steps_per_second": 6.625, "step": 213000 }, { "epoch": 0.14, "learning_rate": 4.320845341018252e-05, "loss": 1.8388, "step": 213100 }, { "epoch": 0.14, "learning_rate": 4.320525136087096e-05, "loss": 1.8178, "step": 213200 }, { "epoch": 0.14, "learning_rate": 4.32020493115594e-05, "loss": 1.8535, "step": 213300 }, { "epoch": 0.14, "learning_rate": 4.319884726224784e-05, "loss": 1.8332, "step": 213400 }, { "epoch": 0.14, "learning_rate": 4.3195645212936283e-05, "loss": 1.8332, "step": 213500 }, { "epoch": 0.14, "learning_rate": 4.319244316362472e-05, "loss": 1.8256, "step": 213600 }, { "epoch": 0.14, "learning_rate": 4.318924111431316e-05, "loss": 1.8502, "step": 213700 }, { "epoch": 0.14, "learning_rate": 4.31860390650016e-05, "loss": 1.828, "step": 213800 }, { "epoch": 0.14, "learning_rate": 4.318283701569004e-05, "loss": 1.8523, "step": 213900 }, { "epoch": 0.14, "learning_rate": 4.317963496637848e-05, "loss": 1.8661, "step": 214000 }, { "epoch": 0.14, "eval_loss": 1.7638564109802246, "eval_runtime": 90.7876, "eval_samples_per_second": 110.147, "eval_steps_per_second": 6.884, "step": 214000 }, { "epoch": 0.14, "learning_rate": 4.317643291706692e-05, "loss": 1.834, "step": 214100 }, { "epoch": 0.14, "learning_rate": 4.317323086775537e-05, "loss": 1.8425, "step": 214200 }, { "epoch": 0.14, "learning_rate": 4.317002881844381e-05, "loss": 1.8277, "step": 214300 }, { "epoch": 0.14, "learning_rate": 4.316682676913225e-05, "loss": 1.8327, "step": 214400 }, { "epoch": 0.14, "learning_rate": 4.316362471982069e-05, "loss": 1.8244, "step": 214500 }, { "epoch": 0.14, "learning_rate": 4.316042267050913e-05, "loss": 1.8478, "step": 214600 }, { "epoch": 0.14, "learning_rate": 4.315722062119757e-05, "loss": 1.8531, "step": 214700 }, { "epoch": 0.14, "learning_rate": 4.315401857188601e-05, "loss": 1.8328, "step": 214800 }, { "epoch": 0.14, "learning_rate": 4.3150816522574447e-05, "loss": 1.8338, "step": 214900 }, { "epoch": 0.14, "learning_rate": 4.314761447326289e-05, "loss": 1.8465, "step": 215000 }, { "epoch": 0.14, "eval_loss": 1.7648680210113525, "eval_runtime": 92.8482, "eval_samples_per_second": 107.703, "eval_steps_per_second": 6.731, "step": 215000 }, { "epoch": 0.14, "learning_rate": 4.314441242395133e-05, "loss": 1.8422, "step": 215100 }, { "epoch": 0.14, "learning_rate": 4.314121037463977e-05, "loss": 1.8578, "step": 215200 }, { "epoch": 0.14, "learning_rate": 4.313800832532821e-05, "loss": 1.8395, "step": 215300 }, { "epoch": 0.14, "learning_rate": 4.313480627601665e-05, "loss": 1.8604, "step": 215400 }, { "epoch": 0.14, "learning_rate": 4.313160422670509e-05, "loss": 1.8427, "step": 215500 }, { "epoch": 0.14, "learning_rate": 4.312840217739353e-05, "loss": 1.8498, "step": 215600 }, { "epoch": 0.14, "learning_rate": 4.312520012808198e-05, "loss": 1.8315, "step": 215700 }, { "epoch": 0.14, "learning_rate": 4.312199807877042e-05, "loss": 1.8332, "step": 215800 }, { "epoch": 0.14, "learning_rate": 4.311879602945886e-05, "loss": 1.8501, "step": 215900 }, { "epoch": 0.14, "learning_rate": 4.31155939801473e-05, "loss": 1.8444, "step": 216000 }, { "epoch": 0.14, "eval_loss": 1.765128493309021, "eval_runtime": 91.6573, "eval_samples_per_second": 109.102, "eval_steps_per_second": 6.819, "step": 216000 }, { "epoch": 0.14, "learning_rate": 4.311239193083574e-05, "loss": 1.8605, "step": 216100 }, { "epoch": 0.14, "learning_rate": 4.310918988152418e-05, "loss": 1.8463, "step": 216200 }, { "epoch": 0.14, "learning_rate": 4.3105987832212616e-05, "loss": 1.827, "step": 216300 }, { "epoch": 0.14, "learning_rate": 4.3102785782901056e-05, "loss": 1.8673, "step": 216400 }, { "epoch": 0.14, "learning_rate": 4.3099583733589496e-05, "loss": 1.8641, "step": 216500 }, { "epoch": 0.14, "learning_rate": 4.309638168427794e-05, "loss": 1.8508, "step": 216600 }, { "epoch": 0.14, "learning_rate": 4.309317963496638e-05, "loss": 1.8419, "step": 216700 }, { "epoch": 0.14, "learning_rate": 4.308997758565482e-05, "loss": 1.8491, "step": 216800 }, { "epoch": 0.14, "learning_rate": 4.308677553634326e-05, "loss": 1.8394, "step": 216900 }, { "epoch": 0.14, "learning_rate": 4.30835734870317e-05, "loss": 1.8346, "step": 217000 }, { "epoch": 0.14, "eval_loss": 1.765671730041504, "eval_runtime": 94.4069, "eval_samples_per_second": 105.924, "eval_steps_per_second": 6.62, "step": 217000 }, { "epoch": 0.14, "learning_rate": 4.308037143772014e-05, "loss": 1.8448, "step": 217100 }, { "epoch": 0.14, "learning_rate": 4.307716938840858e-05, "loss": 1.8657, "step": 217200 }, { "epoch": 0.14, "learning_rate": 4.307396733909703e-05, "loss": 1.8378, "step": 217300 }, { "epoch": 0.14, "learning_rate": 4.307076528978547e-05, "loss": 1.8266, "step": 217400 }, { "epoch": 0.14, "learning_rate": 4.306756324047391e-05, "loss": 1.8245, "step": 217500 }, { "epoch": 0.14, "learning_rate": 4.306436119116235e-05, "loss": 1.8523, "step": 217600 }, { "epoch": 0.14, "learning_rate": 4.3061159141850786e-05, "loss": 1.8437, "step": 217700 }, { "epoch": 0.14, "learning_rate": 4.3057957092539226e-05, "loss": 1.8403, "step": 217800 }, { "epoch": 0.14, "learning_rate": 4.3054755043227666e-05, "loss": 1.8371, "step": 217900 }, { "epoch": 0.14, "learning_rate": 4.305155299391611e-05, "loss": 1.8338, "step": 218000 }, { "epoch": 0.14, "eval_loss": 1.7656636238098145, "eval_runtime": 90.4221, "eval_samples_per_second": 110.592, "eval_steps_per_second": 6.912, "step": 218000 }, { "epoch": 0.14, "learning_rate": 4.3048350944604545e-05, "loss": 1.8322, "step": 218100 }, { "epoch": 0.14, "learning_rate": 4.304514889529299e-05, "loss": 1.8463, "step": 218200 }, { "epoch": 0.14, "learning_rate": 4.304194684598143e-05, "loss": 1.8154, "step": 218300 }, { "epoch": 0.14, "learning_rate": 4.303874479666987e-05, "loss": 1.84, "step": 218400 }, { "epoch": 0.14, "learning_rate": 4.303554274735831e-05, "loss": 1.862, "step": 218500 }, { "epoch": 0.14, "learning_rate": 4.303234069804675e-05, "loss": 1.8401, "step": 218600 }, { "epoch": 0.14, "learning_rate": 4.302913864873519e-05, "loss": 1.8567, "step": 218700 }, { "epoch": 0.14, "learning_rate": 4.302593659942363e-05, "loss": 1.8508, "step": 218800 }, { "epoch": 0.14, "learning_rate": 4.302273455011208e-05, "loss": 1.8413, "step": 218900 }, { "epoch": 0.14, "learning_rate": 4.3019532500800517e-05, "loss": 1.839, "step": 219000 }, { "epoch": 0.14, "eval_loss": 1.7680683135986328, "eval_runtime": 96.4288, "eval_samples_per_second": 103.703, "eval_steps_per_second": 6.481, "step": 219000 }, { "epoch": 0.14, "learning_rate": 4.3016330451488956e-05, "loss": 1.819, "step": 219100 }, { "epoch": 0.14, "learning_rate": 4.3013128402177396e-05, "loss": 1.8392, "step": 219200 }, { "epoch": 0.14, "learning_rate": 4.3009926352865836e-05, "loss": 1.8567, "step": 219300 }, { "epoch": 0.14, "learning_rate": 4.3006724303554275e-05, "loss": 1.8424, "step": 219400 }, { "epoch": 0.14, "learning_rate": 4.3003522254242715e-05, "loss": 1.8433, "step": 219500 }, { "epoch": 0.14, "learning_rate": 4.300032020493116e-05, "loss": 1.8457, "step": 219600 }, { "epoch": 0.14, "learning_rate": 4.2997118155619595e-05, "loss": 1.8293, "step": 219700 }, { "epoch": 0.14, "learning_rate": 4.299391610630804e-05, "loss": 1.847, "step": 219800 }, { "epoch": 0.14, "learning_rate": 4.299071405699648e-05, "loss": 1.839, "step": 219900 }, { "epoch": 0.14, "learning_rate": 4.298751200768492e-05, "loss": 1.8604, "step": 220000 }, { "epoch": 0.14, "eval_loss": 1.7653298377990723, "eval_runtime": 91.4942, "eval_samples_per_second": 109.297, "eval_steps_per_second": 6.831, "step": 220000 }, { "epoch": 0.14, "learning_rate": 4.298430995837336e-05, "loss": 1.8371, "step": 220100 }, { "epoch": 0.14, "learning_rate": 4.29811079090618e-05, "loss": 1.8455, "step": 220200 }, { "epoch": 0.14, "learning_rate": 4.297790585975025e-05, "loss": 1.8331, "step": 220300 }, { "epoch": 0.14, "learning_rate": 4.297470381043868e-05, "loss": 1.8655, "step": 220400 }, { "epoch": 0.14, "learning_rate": 4.2971501761127126e-05, "loss": 1.8291, "step": 220500 }, { "epoch": 0.14, "learning_rate": 4.2968299711815566e-05, "loss": 1.8292, "step": 220600 }, { "epoch": 0.14, "learning_rate": 4.2965097662504006e-05, "loss": 1.8308, "step": 220700 }, { "epoch": 0.14, "learning_rate": 4.2961895613192445e-05, "loss": 1.8537, "step": 220800 }, { "epoch": 0.14, "learning_rate": 4.2958693563880885e-05, "loss": 1.8597, "step": 220900 }, { "epoch": 0.14, "learning_rate": 4.2955491514569325e-05, "loss": 1.8443, "step": 221000 }, { "epoch": 0.14, "eval_loss": 1.7644964456558228, "eval_runtime": 93.5585, "eval_samples_per_second": 106.885, "eval_steps_per_second": 6.68, "step": 221000 }, { "epoch": 0.14, "learning_rate": 4.2952289465257765e-05, "loss": 1.8543, "step": 221100 }, { "epoch": 0.14, "learning_rate": 4.294908741594621e-05, "loss": 1.845, "step": 221200 }, { "epoch": 0.14, "learning_rate": 4.2945885366634644e-05, "loss": 1.8163, "step": 221300 }, { "epoch": 0.14, "learning_rate": 4.294268331732309e-05, "loss": 1.842, "step": 221400 }, { "epoch": 0.14, "learning_rate": 4.293948126801153e-05, "loss": 1.84, "step": 221500 }, { "epoch": 0.14, "learning_rate": 4.293627921869997e-05, "loss": 1.8512, "step": 221600 }, { "epoch": 0.14, "learning_rate": 4.293307716938841e-05, "loss": 1.8565, "step": 221700 }, { "epoch": 0.14, "learning_rate": 4.292987512007685e-05, "loss": 1.8443, "step": 221800 }, { "epoch": 0.14, "learning_rate": 4.2926673070765296e-05, "loss": 1.8288, "step": 221900 }, { "epoch": 0.14, "learning_rate": 4.292347102145373e-05, "loss": 1.8432, "step": 222000 }, { "epoch": 0.14, "eval_loss": 1.766663670539856, "eval_runtime": 93.2522, "eval_samples_per_second": 107.236, "eval_steps_per_second": 6.702, "step": 222000 }, { "epoch": 0.14, "learning_rate": 4.2920268972142176e-05, "loss": 1.8675, "step": 222100 }, { "epoch": 0.14, "learning_rate": 4.2917066922830615e-05, "loss": 1.8575, "step": 222200 }, { "epoch": 0.14, "learning_rate": 4.2913864873519055e-05, "loss": 1.8449, "step": 222300 }, { "epoch": 0.14, "learning_rate": 4.2910662824207495e-05, "loss": 1.8184, "step": 222400 }, { "epoch": 0.14, "learning_rate": 4.2907460774895934e-05, "loss": 1.837, "step": 222500 }, { "epoch": 0.14, "learning_rate": 4.290425872558438e-05, "loss": 1.8284, "step": 222600 }, { "epoch": 0.14, "learning_rate": 4.2901056676272814e-05, "loss": 1.8346, "step": 222700 }, { "epoch": 0.14, "learning_rate": 4.289785462696126e-05, "loss": 1.8066, "step": 222800 }, { "epoch": 0.14, "learning_rate": 4.2894652577649693e-05, "loss": 1.8176, "step": 222900 }, { "epoch": 0.14, "learning_rate": 4.289145052833814e-05, "loss": 1.8334, "step": 223000 }, { "epoch": 0.14, "eval_loss": 1.7657091617584229, "eval_runtime": 94.1905, "eval_samples_per_second": 106.168, "eval_steps_per_second": 6.635, "step": 223000 }, { "epoch": 0.14, "learning_rate": 4.288824847902658e-05, "loss": 1.8214, "step": 223100 }, { "epoch": 0.14, "learning_rate": 4.288504642971502e-05, "loss": 1.8332, "step": 223200 }, { "epoch": 0.14, "learning_rate": 4.288184438040346e-05, "loss": 1.8318, "step": 223300 }, { "epoch": 0.14, "learning_rate": 4.28786423310919e-05, "loss": 1.8281, "step": 223400 }, { "epoch": 0.14, "learning_rate": 4.2875440281780345e-05, "loss": 1.8497, "step": 223500 }, { "epoch": 0.14, "learning_rate": 4.287223823246878e-05, "loss": 1.8288, "step": 223600 }, { "epoch": 0.14, "learning_rate": 4.2869036183157225e-05, "loss": 1.8125, "step": 223700 }, { "epoch": 0.14, "learning_rate": 4.2865834133845665e-05, "loss": 1.8551, "step": 223800 }, { "epoch": 0.14, "learning_rate": 4.2862632084534104e-05, "loss": 1.8438, "step": 223900 }, { "epoch": 0.14, "learning_rate": 4.2859430035222544e-05, "loss": 1.8485, "step": 224000 }, { "epoch": 0.14, "eval_loss": 1.7650192975997925, "eval_runtime": 93.7987, "eval_samples_per_second": 106.611, "eval_steps_per_second": 6.663, "step": 224000 }, { "epoch": 0.14, "learning_rate": 4.2856227985910984e-05, "loss": 1.8436, "step": 224100 }, { "epoch": 0.14, "learning_rate": 4.285302593659943e-05, "loss": 1.8256, "step": 224200 }, { "epoch": 0.14, "learning_rate": 4.284982388728786e-05, "loss": 1.8349, "step": 224300 }, { "epoch": 0.14, "learning_rate": 4.284662183797631e-05, "loss": 1.8152, "step": 224400 }, { "epoch": 0.14, "learning_rate": 4.284341978866474e-05, "loss": 1.8231, "step": 224500 }, { "epoch": 0.14, "learning_rate": 4.284021773935319e-05, "loss": 1.8478, "step": 224600 }, { "epoch": 0.14, "learning_rate": 4.283701569004163e-05, "loss": 1.8053, "step": 224700 }, { "epoch": 0.14, "learning_rate": 4.283381364073007e-05, "loss": 1.8213, "step": 224800 }, { "epoch": 0.14, "learning_rate": 4.2830611591418515e-05, "loss": 1.8182, "step": 224900 }, { "epoch": 0.14, "learning_rate": 4.282740954210695e-05, "loss": 1.8456, "step": 225000 }, { "epoch": 0.14, "eval_loss": 1.7623941898345947, "eval_runtime": 90.0412, "eval_samples_per_second": 111.06, "eval_steps_per_second": 6.941, "step": 225000 }, { "epoch": 0.14, "learning_rate": 4.2824207492795395e-05, "loss": 1.8154, "step": 225100 }, { "epoch": 0.14, "learning_rate": 4.282100544348383e-05, "loss": 1.8331, "step": 225200 }, { "epoch": 0.14, "learning_rate": 4.2817803394172274e-05, "loss": 1.8445, "step": 225300 }, { "epoch": 0.14, "learning_rate": 4.2814601344860714e-05, "loss": 1.8488, "step": 225400 }, { "epoch": 0.14, "learning_rate": 4.2811399295549154e-05, "loss": 1.8389, "step": 225500 }, { "epoch": 0.14, "learning_rate": 4.2808197246237594e-05, "loss": 1.8155, "step": 225600 }, { "epoch": 0.14, "learning_rate": 4.280499519692603e-05, "loss": 1.8082, "step": 225700 }, { "epoch": 0.14, "learning_rate": 4.280179314761448e-05, "loss": 1.8355, "step": 225800 }, { "epoch": 0.14, "learning_rate": 4.279859109830291e-05, "loss": 1.835, "step": 225900 }, { "epoch": 0.14, "learning_rate": 4.279538904899136e-05, "loss": 1.8302, "step": 226000 }, { "epoch": 0.14, "eval_loss": 1.7628825902938843, "eval_runtime": 96.4591, "eval_samples_per_second": 103.671, "eval_steps_per_second": 6.479, "step": 226000 }, { "epoch": 0.14, "learning_rate": 4.279218699967979e-05, "loss": 1.8299, "step": 226100 }, { "epoch": 0.14, "learning_rate": 4.278898495036824e-05, "loss": 1.8404, "step": 226200 }, { "epoch": 0.14, "learning_rate": 4.278578290105668e-05, "loss": 1.839, "step": 226300 }, { "epoch": 0.14, "learning_rate": 4.278258085174512e-05, "loss": 1.8315, "step": 226400 }, { "epoch": 0.14, "learning_rate": 4.2779378802433565e-05, "loss": 1.842, "step": 226500 }, { "epoch": 0.15, "learning_rate": 4.2776176753122e-05, "loss": 1.8386, "step": 226600 }, { "epoch": 0.15, "learning_rate": 4.2772974703810444e-05, "loss": 1.841, "step": 226700 }, { "epoch": 0.15, "learning_rate": 4.276977265449888e-05, "loss": 1.8341, "step": 226800 }, { "epoch": 0.15, "learning_rate": 4.2766570605187324e-05, "loss": 1.8444, "step": 226900 }, { "epoch": 0.15, "learning_rate": 4.2763368555875763e-05, "loss": 1.8385, "step": 227000 }, { "epoch": 0.15, "eval_loss": 1.761794924736023, "eval_runtime": 93.9621, "eval_samples_per_second": 106.426, "eval_steps_per_second": 6.652, "step": 227000 }, { "epoch": 0.15, "learning_rate": 4.27601665065642e-05, "loss": 1.8515, "step": 227100 }, { "epoch": 0.15, "learning_rate": 4.275696445725264e-05, "loss": 1.8305, "step": 227200 }, { "epoch": 0.15, "learning_rate": 4.275376240794108e-05, "loss": 1.8289, "step": 227300 }, { "epoch": 0.15, "learning_rate": 4.275056035862953e-05, "loss": 1.8169, "step": 227400 }, { "epoch": 0.15, "learning_rate": 4.274735830931796e-05, "loss": 1.8249, "step": 227500 }, { "epoch": 0.15, "learning_rate": 4.274415626000641e-05, "loss": 1.8254, "step": 227600 }, { "epoch": 0.15, "learning_rate": 4.274095421069484e-05, "loss": 1.8353, "step": 227700 }, { "epoch": 0.15, "learning_rate": 4.273775216138329e-05, "loss": 1.8192, "step": 227800 }, { "epoch": 0.15, "learning_rate": 4.273455011207173e-05, "loss": 1.8286, "step": 227900 }, { "epoch": 0.15, "learning_rate": 4.273134806276017e-05, "loss": 1.8236, "step": 228000 }, { "epoch": 0.15, "eval_loss": 1.76215660572052, "eval_runtime": 92.1601, "eval_samples_per_second": 108.507, "eval_steps_per_second": 6.782, "step": 228000 }, { "epoch": 0.15, "learning_rate": 4.2728146013448614e-05, "loss": 1.8448, "step": 228100 }, { "epoch": 0.15, "learning_rate": 4.272494396413705e-05, "loss": 1.8298, "step": 228200 }, { "epoch": 0.15, "learning_rate": 4.2721741914825494e-05, "loss": 1.8353, "step": 228300 }, { "epoch": 0.15, "learning_rate": 4.2718539865513927e-05, "loss": 1.8448, "step": 228400 }, { "epoch": 0.15, "learning_rate": 4.271533781620237e-05, "loss": 1.8387, "step": 228500 }, { "epoch": 0.15, "learning_rate": 4.271213576689081e-05, "loss": 1.8347, "step": 228600 }, { "epoch": 0.15, "learning_rate": 4.270893371757925e-05, "loss": 1.8472, "step": 228700 }, { "epoch": 0.15, "learning_rate": 4.270573166826769e-05, "loss": 1.85, "step": 228800 }, { "epoch": 0.15, "learning_rate": 4.270252961895613e-05, "loss": 1.8192, "step": 228900 }, { "epoch": 0.15, "learning_rate": 4.269932756964458e-05, "loss": 1.8395, "step": 229000 }, { "epoch": 0.15, "eval_loss": 1.7616909742355347, "eval_runtime": 92.8248, "eval_samples_per_second": 107.73, "eval_steps_per_second": 6.733, "step": 229000 }, { "epoch": 0.15, "learning_rate": 4.269612552033301e-05, "loss": 1.8206, "step": 229100 }, { "epoch": 0.15, "learning_rate": 4.269292347102146e-05, "loss": 1.8344, "step": 229200 }, { "epoch": 0.15, "learning_rate": 4.268972142170989e-05, "loss": 1.8318, "step": 229300 }, { "epoch": 0.15, "learning_rate": 4.268651937239834e-05, "loss": 1.8341, "step": 229400 }, { "epoch": 0.15, "learning_rate": 4.268331732308678e-05, "loss": 1.8144, "step": 229500 }, { "epoch": 0.15, "learning_rate": 4.268011527377522e-05, "loss": 1.8418, "step": 229600 }, { "epoch": 0.15, "learning_rate": 4.2676913224463663e-05, "loss": 1.8358, "step": 229700 }, { "epoch": 0.15, "learning_rate": 4.2673711175152096e-05, "loss": 1.8194, "step": 229800 }, { "epoch": 0.15, "learning_rate": 4.267050912584054e-05, "loss": 1.8229, "step": 229900 }, { "epoch": 0.15, "learning_rate": 4.2667307076528976e-05, "loss": 1.8153, "step": 230000 }, { "epoch": 0.15, "eval_loss": 1.7616212368011475, "eval_runtime": 89.5208, "eval_samples_per_second": 111.706, "eval_steps_per_second": 6.982, "step": 230000 }, { "epoch": 0.15, "learning_rate": 4.266410502721742e-05, "loss": 1.8406, "step": 230100 }, { "epoch": 0.15, "learning_rate": 4.266090297790586e-05, "loss": 1.8356, "step": 230200 }, { "epoch": 0.15, "learning_rate": 4.26577009285943e-05, "loss": 1.8396, "step": 230300 }, { "epoch": 0.15, "learning_rate": 4.265449887928274e-05, "loss": 1.8229, "step": 230400 }, { "epoch": 0.15, "learning_rate": 4.265129682997118e-05, "loss": 1.8447, "step": 230500 }, { "epoch": 0.15, "learning_rate": 4.264809478065963e-05, "loss": 1.8399, "step": 230600 }, { "epoch": 0.15, "learning_rate": 4.264489273134806e-05, "loss": 1.817, "step": 230700 }, { "epoch": 0.15, "learning_rate": 4.264169068203651e-05, "loss": 1.8288, "step": 230800 }, { "epoch": 0.15, "learning_rate": 4.263848863272494e-05, "loss": 1.8334, "step": 230900 }, { "epoch": 0.15, "learning_rate": 4.263528658341339e-05, "loss": 1.8247, "step": 231000 }, { "epoch": 0.15, "eval_loss": 1.760667324066162, "eval_runtime": 94.1635, "eval_samples_per_second": 106.198, "eval_steps_per_second": 6.637, "step": 231000 }, { "epoch": 0.15, "learning_rate": 4.2632084534101827e-05, "loss": 1.8126, "step": 231100 }, { "epoch": 0.15, "learning_rate": 4.2628882484790266e-05, "loss": 1.8478, "step": 231200 }, { "epoch": 0.15, "learning_rate": 4.262568043547871e-05, "loss": 1.8398, "step": 231300 }, { "epoch": 0.15, "learning_rate": 4.2622478386167146e-05, "loss": 1.8328, "step": 231400 }, { "epoch": 0.15, "learning_rate": 4.261927633685559e-05, "loss": 1.8204, "step": 231500 }, { "epoch": 0.15, "learning_rate": 4.2616074287544025e-05, "loss": 1.8259, "step": 231600 }, { "epoch": 0.15, "learning_rate": 4.261287223823247e-05, "loss": 1.835, "step": 231700 }, { "epoch": 0.15, "learning_rate": 4.260967018892091e-05, "loss": 1.8395, "step": 231800 }, { "epoch": 0.15, "learning_rate": 4.260646813960935e-05, "loss": 1.8504, "step": 231900 }, { "epoch": 0.15, "learning_rate": 4.260326609029779e-05, "loss": 1.8315, "step": 232000 }, { "epoch": 0.15, "eval_loss": 1.7596756219863892, "eval_runtime": 95.6351, "eval_samples_per_second": 104.564, "eval_steps_per_second": 6.535, "step": 232000 }, { "epoch": 0.15, "learning_rate": 4.260006404098623e-05, "loss": 1.8331, "step": 232100 }, { "epoch": 0.15, "learning_rate": 4.259686199167468e-05, "loss": 1.8399, "step": 232200 }, { "epoch": 0.15, "learning_rate": 4.259365994236311e-05, "loss": 1.8064, "step": 232300 }, { "epoch": 0.15, "learning_rate": 4.259045789305156e-05, "loss": 1.8316, "step": 232400 }, { "epoch": 0.15, "learning_rate": 4.2587255843739996e-05, "loss": 1.8161, "step": 232500 }, { "epoch": 0.15, "learning_rate": 4.2584053794428436e-05, "loss": 1.8151, "step": 232600 }, { "epoch": 0.15, "learning_rate": 4.2580851745116876e-05, "loss": 1.8355, "step": 232700 }, { "epoch": 0.15, "learning_rate": 4.2577649695805316e-05, "loss": 1.8086, "step": 232800 }, { "epoch": 0.15, "learning_rate": 4.257444764649376e-05, "loss": 1.8494, "step": 232900 }, { "epoch": 0.15, "learning_rate": 4.2571245597182195e-05, "loss": 1.8257, "step": 233000 }, { "epoch": 0.15, "eval_loss": 1.7621948719024658, "eval_runtime": 91.5006, "eval_samples_per_second": 109.289, "eval_steps_per_second": 6.831, "step": 233000 }, { "epoch": 0.15, "learning_rate": 4.256804354787064e-05, "loss": 1.8208, "step": 233100 }, { "epoch": 0.15, "learning_rate": 4.2564841498559075e-05, "loss": 1.8337, "step": 233200 }, { "epoch": 0.15, "learning_rate": 4.256163944924752e-05, "loss": 1.8455, "step": 233300 }, { "epoch": 0.15, "learning_rate": 4.255843739993596e-05, "loss": 1.8373, "step": 233400 }, { "epoch": 0.15, "learning_rate": 4.25552353506244e-05, "loss": 1.8296, "step": 233500 }, { "epoch": 0.15, "learning_rate": 4.255203330131284e-05, "loss": 1.8303, "step": 233600 }, { "epoch": 0.15, "learning_rate": 4.254883125200128e-05, "loss": 1.8191, "step": 233700 }, { "epoch": 0.15, "learning_rate": 4.254562920268973e-05, "loss": 1.8234, "step": 233800 }, { "epoch": 0.15, "learning_rate": 4.254242715337816e-05, "loss": 1.8407, "step": 233900 }, { "epoch": 0.15, "learning_rate": 4.2539225104066606e-05, "loss": 1.8167, "step": 234000 }, { "epoch": 0.15, "eval_loss": 1.7620700597763062, "eval_runtime": 95.2008, "eval_samples_per_second": 105.041, "eval_steps_per_second": 6.565, "step": 234000 }, { "epoch": 0.15, "learning_rate": 4.2536023054755046e-05, "loss": 1.828, "step": 234100 }, { "epoch": 0.15, "learning_rate": 4.2532821005443486e-05, "loss": 1.834, "step": 234200 }, { "epoch": 0.15, "learning_rate": 4.2529618956131925e-05, "loss": 1.8233, "step": 234300 }, { "epoch": 0.15, "learning_rate": 4.2526416906820365e-05, "loss": 1.8242, "step": 234400 }, { "epoch": 0.15, "learning_rate": 4.252321485750881e-05, "loss": 1.8333, "step": 234500 }, { "epoch": 0.15, "learning_rate": 4.2520012808197245e-05, "loss": 1.8439, "step": 234600 }, { "epoch": 0.15, "learning_rate": 4.251681075888569e-05, "loss": 1.8473, "step": 234700 }, { "epoch": 0.15, "learning_rate": 4.251360870957413e-05, "loss": 1.8359, "step": 234800 }, { "epoch": 0.15, "learning_rate": 4.251040666026257e-05, "loss": 1.8128, "step": 234900 }, { "epoch": 0.15, "learning_rate": 4.250720461095101e-05, "loss": 1.821, "step": 235000 }, { "epoch": 0.15, "eval_loss": 1.7613176107406616, "eval_runtime": 91.1713, "eval_samples_per_second": 109.684, "eval_steps_per_second": 6.855, "step": 235000 }, { "epoch": 0.15, "learning_rate": 4.250400256163945e-05, "loss": 1.8395, "step": 235100 }, { "epoch": 0.15, "learning_rate": 4.250080051232789e-05, "loss": 1.8246, "step": 235200 }, { "epoch": 0.15, "learning_rate": 4.249759846301633e-05, "loss": 1.8164, "step": 235300 }, { "epoch": 0.15, "learning_rate": 4.2494396413704776e-05, "loss": 1.8187, "step": 235400 }, { "epoch": 0.15, "learning_rate": 4.2491194364393216e-05, "loss": 1.8446, "step": 235500 }, { "epoch": 0.15, "learning_rate": 4.2487992315081655e-05, "loss": 1.8247, "step": 235600 }, { "epoch": 0.15, "learning_rate": 4.2484790265770095e-05, "loss": 1.8459, "step": 235700 }, { "epoch": 0.15, "learning_rate": 4.2481588216458535e-05, "loss": 1.8502, "step": 235800 }, { "epoch": 0.15, "learning_rate": 4.2478386167146975e-05, "loss": 1.8178, "step": 235900 }, { "epoch": 0.15, "learning_rate": 4.2475184117835414e-05, "loss": 1.8179, "step": 236000 }, { "epoch": 0.15, "eval_loss": 1.7625064849853516, "eval_runtime": 90.3183, "eval_samples_per_second": 110.72, "eval_steps_per_second": 6.92, "step": 236000 }, { "epoch": 0.15, "learning_rate": 4.247198206852386e-05, "loss": 1.8156, "step": 236100 }, { "epoch": 0.15, "learning_rate": 4.2468780019212294e-05, "loss": 1.8132, "step": 236200 }, { "epoch": 0.15, "learning_rate": 4.246557796990074e-05, "loss": 1.8149, "step": 236300 }, { "epoch": 0.15, "learning_rate": 4.246237592058918e-05, "loss": 1.8456, "step": 236400 }, { "epoch": 0.15, "learning_rate": 4.245917387127762e-05, "loss": 1.8294, "step": 236500 }, { "epoch": 0.15, "learning_rate": 4.245597182196606e-05, "loss": 1.8282, "step": 236600 }, { "epoch": 0.15, "learning_rate": 4.24527697726545e-05, "loss": 1.8238, "step": 236700 }, { "epoch": 0.15, "learning_rate": 4.244956772334294e-05, "loss": 1.8302, "step": 236800 }, { "epoch": 0.15, "learning_rate": 4.244636567403138e-05, "loss": 1.8255, "step": 236900 }, { "epoch": 0.15, "learning_rate": 4.2443163624719825e-05, "loss": 1.8289, "step": 237000 }, { "epoch": 0.15, "eval_loss": 1.7626121044158936, "eval_runtime": 93.7139, "eval_samples_per_second": 106.708, "eval_steps_per_second": 6.669, "step": 237000 }, { "epoch": 0.15, "learning_rate": 4.2439961575408265e-05, "loss": 1.8324, "step": 237100 }, { "epoch": 0.15, "learning_rate": 4.2436759526096705e-05, "loss": 1.8293, "step": 237200 }, { "epoch": 0.15, "learning_rate": 4.2433557476785145e-05, "loss": 1.8221, "step": 237300 }, { "epoch": 0.15, "learning_rate": 4.2430355427473584e-05, "loss": 1.8406, "step": 237400 }, { "epoch": 0.15, "learning_rate": 4.2427153378162024e-05, "loss": 1.8379, "step": 237500 }, { "epoch": 0.15, "learning_rate": 4.2423951328850464e-05, "loss": 1.8285, "step": 237600 }, { "epoch": 0.15, "learning_rate": 4.242074927953891e-05, "loss": 1.8257, "step": 237700 }, { "epoch": 0.15, "learning_rate": 4.241754723022735e-05, "loss": 1.8206, "step": 237800 }, { "epoch": 0.15, "learning_rate": 4.241434518091579e-05, "loss": 1.8302, "step": 237900 }, { "epoch": 0.15, "learning_rate": 4.241114313160423e-05, "loss": 1.8381, "step": 238000 }, { "epoch": 0.15, "eval_loss": 1.7574859857559204, "eval_runtime": 90.8687, "eval_samples_per_second": 110.049, "eval_steps_per_second": 6.878, "step": 238000 }, { "epoch": 0.15, "learning_rate": 4.240794108229267e-05, "loss": 1.8213, "step": 238100 }, { "epoch": 0.15, "learning_rate": 4.240473903298111e-05, "loss": 1.7946, "step": 238200 }, { "epoch": 0.15, "learning_rate": 4.240153698366955e-05, "loss": 1.8306, "step": 238300 }, { "epoch": 0.15, "learning_rate": 4.239833493435799e-05, "loss": 1.8563, "step": 238400 }, { "epoch": 0.15, "learning_rate": 4.239513288504643e-05, "loss": 1.8318, "step": 238500 }, { "epoch": 0.15, "learning_rate": 4.2391930835734875e-05, "loss": 1.8308, "step": 238600 }, { "epoch": 0.15, "learning_rate": 4.2388728786423314e-05, "loss": 1.808, "step": 238700 }, { "epoch": 0.15, "learning_rate": 4.2385526737111754e-05, "loss": 1.8369, "step": 238800 }, { "epoch": 0.15, "learning_rate": 4.2382324687800194e-05, "loss": 1.8088, "step": 238900 }, { "epoch": 0.15, "learning_rate": 4.2379122638488634e-05, "loss": 1.8223, "step": 239000 }, { "epoch": 0.15, "eval_loss": 1.7622008323669434, "eval_runtime": 90.5043, "eval_samples_per_second": 110.492, "eval_steps_per_second": 6.906, "step": 239000 }, { "epoch": 0.15, "learning_rate": 4.2375920589177073e-05, "loss": 1.8168, "step": 239100 }, { "epoch": 0.15, "learning_rate": 4.237271853986551e-05, "loss": 1.8169, "step": 239200 }, { "epoch": 0.15, "learning_rate": 4.236951649055396e-05, "loss": 1.8412, "step": 239300 }, { "epoch": 0.15, "learning_rate": 4.23663144412424e-05, "loss": 1.8248, "step": 239400 }, { "epoch": 0.15, "learning_rate": 4.236311239193084e-05, "loss": 1.8331, "step": 239500 }, { "epoch": 0.15, "learning_rate": 4.235991034261928e-05, "loss": 1.809, "step": 239600 }, { "epoch": 0.15, "learning_rate": 4.235670829330772e-05, "loss": 1.8195, "step": 239700 }, { "epoch": 0.15, "learning_rate": 4.235350624399616e-05, "loss": 1.8439, "step": 239800 }, { "epoch": 0.15, "learning_rate": 4.23503041946846e-05, "loss": 1.8203, "step": 239900 }, { "epoch": 0.15, "learning_rate": 4.234710214537304e-05, "loss": 1.8255, "step": 240000 }, { "epoch": 0.15, "eval_loss": 1.7587425708770752, "eval_runtime": 94.9916, "eval_samples_per_second": 105.272, "eval_steps_per_second": 6.58, "step": 240000 }, { "epoch": 0.15, "learning_rate": 4.2343900096061484e-05, "loss": 1.8165, "step": 240100 }, { "epoch": 0.15, "learning_rate": 4.2340698046749924e-05, "loss": 1.8373, "step": 240200 }, { "epoch": 0.15, "learning_rate": 4.2337495997438364e-05, "loss": 1.8072, "step": 240300 }, { "epoch": 0.15, "learning_rate": 4.2334293948126804e-05, "loss": 1.8134, "step": 240400 }, { "epoch": 0.15, "learning_rate": 4.233109189881524e-05, "loss": 1.8118, "step": 240500 }, { "epoch": 0.15, "learning_rate": 4.232788984950368e-05, "loss": 1.803, "step": 240600 }, { "epoch": 0.15, "learning_rate": 4.232468780019212e-05, "loss": 1.824, "step": 240700 }, { "epoch": 0.15, "learning_rate": 4.232148575088056e-05, "loss": 1.7999, "step": 240800 }, { "epoch": 0.15, "learning_rate": 4.231828370156901e-05, "loss": 1.8059, "step": 240900 }, { "epoch": 0.15, "learning_rate": 4.231508165225745e-05, "loss": 1.8325, "step": 241000 }, { "epoch": 0.15, "eval_loss": 1.7590054273605347, "eval_runtime": 93.8123, "eval_samples_per_second": 106.596, "eval_steps_per_second": 6.662, "step": 241000 }, { "epoch": 0.15, "learning_rate": 4.231187960294589e-05, "loss": 1.8472, "step": 241100 }, { "epoch": 0.15, "learning_rate": 4.230867755363433e-05, "loss": 1.8201, "step": 241200 }, { "epoch": 0.15, "learning_rate": 4.230547550432277e-05, "loss": 1.8219, "step": 241300 }, { "epoch": 0.15, "learning_rate": 4.230227345501121e-05, "loss": 1.8098, "step": 241400 }, { "epoch": 0.15, "learning_rate": 4.229907140569965e-05, "loss": 1.821, "step": 241500 }, { "epoch": 0.15, "learning_rate": 4.229586935638809e-05, "loss": 1.8387, "step": 241600 }, { "epoch": 0.15, "learning_rate": 4.2292667307076534e-05, "loss": 1.8206, "step": 241700 }, { "epoch": 0.15, "learning_rate": 4.2289465257764973e-05, "loss": 1.8177, "step": 241800 }, { "epoch": 0.15, "learning_rate": 4.228626320845341e-05, "loss": 1.804, "step": 241900 }, { "epoch": 0.15, "learning_rate": 4.228306115914185e-05, "loss": 1.8147, "step": 242000 }, { "epoch": 0.15, "eval_loss": 1.7603613138198853, "eval_runtime": 88.7511, "eval_samples_per_second": 112.675, "eval_steps_per_second": 7.042, "step": 242000 }, { "epoch": 0.15, "learning_rate": 4.227985910983029e-05, "loss": 1.8147, "step": 242100 }, { "epoch": 0.16, "learning_rate": 4.227665706051873e-05, "loss": 1.8188, "step": 242200 }, { "epoch": 0.16, "learning_rate": 4.227345501120717e-05, "loss": 1.8238, "step": 242300 }, { "epoch": 0.16, "learning_rate": 4.227025296189562e-05, "loss": 1.8028, "step": 242400 }, { "epoch": 0.16, "learning_rate": 4.226705091258406e-05, "loss": 1.8105, "step": 242500 }, { "epoch": 0.16, "learning_rate": 4.22638488632725e-05, "loss": 1.8104, "step": 242600 }, { "epoch": 0.16, "learning_rate": 4.226064681396094e-05, "loss": 1.7999, "step": 242700 }, { "epoch": 0.16, "learning_rate": 4.225744476464938e-05, "loss": 1.8244, "step": 242800 }, { "epoch": 0.16, "learning_rate": 4.225424271533782e-05, "loss": 1.8341, "step": 242900 }, { "epoch": 0.16, "learning_rate": 4.225104066602626e-05, "loss": 1.7969, "step": 243000 }, { "epoch": 0.16, "eval_loss": 1.7622517347335815, "eval_runtime": 91.8342, "eval_samples_per_second": 108.892, "eval_steps_per_second": 6.806, "step": 243000 }, { "epoch": 0.16, "learning_rate": 4.22478386167147e-05, "loss": 1.8252, "step": 243100 }, { "epoch": 0.16, "learning_rate": 4.2244636567403137e-05, "loss": 1.8213, "step": 243200 }, { "epoch": 0.16, "learning_rate": 4.224143451809158e-05, "loss": 1.8334, "step": 243300 }, { "epoch": 0.16, "learning_rate": 4.223823246878002e-05, "loss": 1.8129, "step": 243400 }, { "epoch": 0.16, "learning_rate": 4.223503041946846e-05, "loss": 1.8132, "step": 243500 }, { "epoch": 0.16, "learning_rate": 4.22318283701569e-05, "loss": 1.8218, "step": 243600 }, { "epoch": 0.16, "learning_rate": 4.222862632084534e-05, "loss": 1.8353, "step": 243700 }, { "epoch": 0.16, "learning_rate": 4.222542427153378e-05, "loss": 1.8168, "step": 243800 }, { "epoch": 0.16, "learning_rate": 4.222222222222222e-05, "loss": 1.8103, "step": 243900 }, { "epoch": 0.16, "learning_rate": 4.221902017291067e-05, "loss": 1.8118, "step": 244000 }, { "epoch": 0.16, "eval_loss": 1.7599226236343384, "eval_runtime": 93.5373, "eval_samples_per_second": 106.909, "eval_steps_per_second": 6.682, "step": 244000 }, { "epoch": 0.16, "learning_rate": 4.221581812359911e-05, "loss": 1.842, "step": 244100 }, { "epoch": 0.16, "learning_rate": 4.221261607428755e-05, "loss": 1.8242, "step": 244200 }, { "epoch": 0.16, "learning_rate": 4.220941402497599e-05, "loss": 1.8301, "step": 244300 }, { "epoch": 0.16, "learning_rate": 4.220621197566443e-05, "loss": 1.8216, "step": 244400 }, { "epoch": 0.16, "learning_rate": 4.220300992635287e-05, "loss": 1.8089, "step": 244500 }, { "epoch": 0.16, "learning_rate": 4.2199807877041306e-05, "loss": 1.8397, "step": 244600 }, { "epoch": 0.16, "learning_rate": 4.219660582772975e-05, "loss": 1.8282, "step": 244700 }, { "epoch": 0.16, "learning_rate": 4.2193403778418186e-05, "loss": 1.8057, "step": 244800 }, { "epoch": 0.16, "learning_rate": 4.219020172910663e-05, "loss": 1.8234, "step": 244900 }, { "epoch": 0.16, "learning_rate": 4.218699967979507e-05, "loss": 1.8289, "step": 245000 }, { "epoch": 0.16, "eval_loss": 1.7590441703796387, "eval_runtime": 91.3753, "eval_samples_per_second": 109.439, "eval_steps_per_second": 6.84, "step": 245000 }, { "epoch": 0.16, "learning_rate": 4.218379763048351e-05, "loss": 1.84, "step": 245100 }, { "epoch": 0.16, "learning_rate": 4.218059558117195e-05, "loss": 1.8386, "step": 245200 }, { "epoch": 0.16, "learning_rate": 4.217739353186039e-05, "loss": 1.8254, "step": 245300 }, { "epoch": 0.16, "learning_rate": 4.217419148254884e-05, "loss": 1.8144, "step": 245400 }, { "epoch": 0.16, "learning_rate": 4.217098943323727e-05, "loss": 1.8216, "step": 245500 }, { "epoch": 0.16, "learning_rate": 4.216778738392572e-05, "loss": 1.8183, "step": 245600 }, { "epoch": 0.16, "learning_rate": 4.216458533461416e-05, "loss": 1.813, "step": 245700 }, { "epoch": 0.16, "learning_rate": 4.21613832853026e-05, "loss": 1.8249, "step": 245800 }, { "epoch": 0.16, "learning_rate": 4.215818123599104e-05, "loss": 1.8399, "step": 245900 }, { "epoch": 0.16, "learning_rate": 4.2154979186679476e-05, "loss": 1.8133, "step": 246000 }, { "epoch": 0.16, "eval_loss": 1.7577552795410156, "eval_runtime": 92.6811, "eval_samples_per_second": 107.897, "eval_steps_per_second": 6.744, "step": 246000 }, { "epoch": 0.16, "learning_rate": 4.2151777137367916e-05, "loss": 1.814, "step": 246100 }, { "epoch": 0.16, "learning_rate": 4.2148575088056356e-05, "loss": 1.8226, "step": 246200 }, { "epoch": 0.16, "learning_rate": 4.21453730387448e-05, "loss": 1.8198, "step": 246300 }, { "epoch": 0.16, "learning_rate": 4.2142170989433235e-05, "loss": 1.8306, "step": 246400 }, { "epoch": 0.16, "learning_rate": 4.213896894012168e-05, "loss": 1.8042, "step": 246500 }, { "epoch": 0.16, "learning_rate": 4.213576689081012e-05, "loss": 1.8026, "step": 246600 }, { "epoch": 0.16, "learning_rate": 4.213256484149856e-05, "loss": 1.8194, "step": 246700 }, { "epoch": 0.16, "learning_rate": 4.2129362792187e-05, "loss": 1.8141, "step": 246800 }, { "epoch": 0.16, "learning_rate": 4.212616074287544e-05, "loss": 1.8314, "step": 246900 }, { "epoch": 0.16, "learning_rate": 4.212295869356389e-05, "loss": 1.8253, "step": 247000 }, { "epoch": 0.16, "eval_loss": 1.759398102760315, "eval_runtime": 93.525, "eval_samples_per_second": 106.923, "eval_steps_per_second": 6.683, "step": 247000 }, { "epoch": 0.16, "learning_rate": 4.211975664425232e-05, "loss": 1.8138, "step": 247100 }, { "epoch": 0.16, "learning_rate": 4.211655459494077e-05, "loss": 1.8216, "step": 247200 }, { "epoch": 0.16, "learning_rate": 4.2113352545629207e-05, "loss": 1.8104, "step": 247300 }, { "epoch": 0.16, "learning_rate": 4.2110150496317646e-05, "loss": 1.8197, "step": 247400 }, { "epoch": 0.16, "learning_rate": 4.2106948447006086e-05, "loss": 1.8169, "step": 247500 }, { "epoch": 0.16, "learning_rate": 4.2103746397694526e-05, "loss": 1.8346, "step": 247600 }, { "epoch": 0.16, "learning_rate": 4.210054434838297e-05, "loss": 1.8096, "step": 247700 }, { "epoch": 0.16, "learning_rate": 4.2097342299071405e-05, "loss": 1.8298, "step": 247800 }, { "epoch": 0.16, "learning_rate": 4.209414024975985e-05, "loss": 1.8228, "step": 247900 }, { "epoch": 0.16, "learning_rate": 4.2090938200448285e-05, "loss": 1.8132, "step": 248000 }, { "epoch": 0.16, "eval_loss": 1.7592939138412476, "eval_runtime": 92.8981, "eval_samples_per_second": 107.645, "eval_steps_per_second": 6.728, "step": 248000 }, { "epoch": 0.16, "learning_rate": 4.208773615113673e-05, "loss": 1.8015, "step": 248100 }, { "epoch": 0.16, "learning_rate": 4.208453410182517e-05, "loss": 1.8161, "step": 248200 }, { "epoch": 0.16, "learning_rate": 4.208133205251361e-05, "loss": 1.8121, "step": 248300 }, { "epoch": 0.16, "learning_rate": 4.207813000320205e-05, "loss": 1.818, "step": 248400 }, { "epoch": 0.16, "learning_rate": 4.207492795389049e-05, "loss": 1.8159, "step": 248500 }, { "epoch": 0.16, "learning_rate": 4.207172590457894e-05, "loss": 1.823, "step": 248600 }, { "epoch": 0.16, "learning_rate": 4.206852385526737e-05, "loss": 1.7983, "step": 248700 }, { "epoch": 0.16, "learning_rate": 4.2065321805955816e-05, "loss": 1.8183, "step": 248800 }, { "epoch": 0.16, "learning_rate": 4.2062119756644256e-05, "loss": 1.8316, "step": 248900 }, { "epoch": 0.16, "learning_rate": 4.2058917707332696e-05, "loss": 1.8059, "step": 249000 }, { "epoch": 0.16, "eval_loss": 1.7592170238494873, "eval_runtime": 92.5386, "eval_samples_per_second": 108.063, "eval_steps_per_second": 6.754, "step": 249000 }, { "epoch": 0.16, "learning_rate": 4.2055715658021135e-05, "loss": 1.8038, "step": 249100 }, { "epoch": 0.16, "learning_rate": 4.2052513608709575e-05, "loss": 1.832, "step": 249200 }, { "epoch": 0.16, "learning_rate": 4.204931155939802e-05, "loss": 1.7965, "step": 249300 }, { "epoch": 0.16, "learning_rate": 4.2046109510086455e-05, "loss": 1.8115, "step": 249400 }, { "epoch": 0.16, "learning_rate": 4.20429074607749e-05, "loss": 1.845, "step": 249500 }, { "epoch": 0.16, "learning_rate": 4.2039705411463334e-05, "loss": 1.7923, "step": 249600 }, { "epoch": 0.16, "learning_rate": 4.203650336215178e-05, "loss": 1.8349, "step": 249700 }, { "epoch": 0.16, "learning_rate": 4.203330131284022e-05, "loss": 1.8192, "step": 249800 }, { "epoch": 0.16, "learning_rate": 4.203009926352866e-05, "loss": 1.8338, "step": 249900 }, { "epoch": 0.16, "learning_rate": 4.2026897214217107e-05, "loss": 1.8226, "step": 250000 }, { "epoch": 0.16, "eval_loss": 1.7564191818237305, "eval_runtime": 93.7711, "eval_samples_per_second": 106.643, "eval_steps_per_second": 6.665, "step": 250000 }, { "epoch": 0.16, "learning_rate": 4.202369516490554e-05, "loss": 1.8224, "step": 250100 }, { "epoch": 0.16, "learning_rate": 4.2020493115593986e-05, "loss": 1.7857, "step": 250200 }, { "epoch": 0.16, "learning_rate": 4.201729106628242e-05, "loss": 1.8117, "step": 250300 }, { "epoch": 0.16, "learning_rate": 4.2014089016970866e-05, "loss": 1.8193, "step": 250400 }, { "epoch": 0.16, "learning_rate": 4.2010886967659305e-05, "loss": 1.8218, "step": 250500 }, { "epoch": 0.16, "learning_rate": 4.2007684918347745e-05, "loss": 1.8034, "step": 250600 }, { "epoch": 0.16, "learning_rate": 4.2004482869036185e-05, "loss": 1.8092, "step": 250700 }, { "epoch": 0.16, "learning_rate": 4.2001280819724624e-05, "loss": 1.8142, "step": 250800 }, { "epoch": 0.16, "learning_rate": 4.199807877041307e-05, "loss": 1.8129, "step": 250900 }, { "epoch": 0.16, "learning_rate": 4.1994876721101504e-05, "loss": 1.8013, "step": 251000 }, { "epoch": 0.16, "eval_loss": 1.7592030763626099, "eval_runtime": 94.9168, "eval_samples_per_second": 105.355, "eval_steps_per_second": 6.585, "step": 251000 }, { "epoch": 0.16, "learning_rate": 4.199167467178995e-05, "loss": 1.8093, "step": 251100 }, { "epoch": 0.16, "learning_rate": 4.1988472622478383e-05, "loss": 1.8203, "step": 251200 }, { "epoch": 0.16, "learning_rate": 4.198527057316683e-05, "loss": 1.7975, "step": 251300 }, { "epoch": 0.16, "learning_rate": 4.198206852385527e-05, "loss": 1.8224, "step": 251400 }, { "epoch": 0.16, "learning_rate": 4.197886647454371e-05, "loss": 1.8145, "step": 251500 }, { "epoch": 0.16, "learning_rate": 4.1975664425232156e-05, "loss": 1.8327, "step": 251600 }, { "epoch": 0.16, "learning_rate": 4.197246237592059e-05, "loss": 1.7891, "step": 251700 }, { "epoch": 0.16, "learning_rate": 4.1969260326609035e-05, "loss": 1.8271, "step": 251800 }, { "epoch": 0.16, "learning_rate": 4.196605827729747e-05, "loss": 1.815, "step": 251900 }, { "epoch": 0.16, "learning_rate": 4.1962856227985915e-05, "loss": 1.7884, "step": 252000 }, { "epoch": 0.16, "eval_loss": 1.7569478750228882, "eval_runtime": 91.404, "eval_samples_per_second": 109.404, "eval_steps_per_second": 6.838, "step": 252000 }, { "epoch": 0.16, "learning_rate": 4.1959654178674355e-05, "loss": 1.8299, "step": 252100 }, { "epoch": 0.16, "learning_rate": 4.1956452129362794e-05, "loss": 1.8197, "step": 252200 }, { "epoch": 0.16, "learning_rate": 4.1953250080051234e-05, "loss": 1.8173, "step": 252300 }, { "epoch": 0.16, "learning_rate": 4.1950048030739674e-05, "loss": 1.8017, "step": 252400 }, { "epoch": 0.16, "learning_rate": 4.194684598142812e-05, "loss": 1.8391, "step": 252500 }, { "epoch": 0.16, "learning_rate": 4.194364393211655e-05, "loss": 1.825, "step": 252600 }, { "epoch": 0.16, "learning_rate": 4.1940441882805e-05, "loss": 1.8078, "step": 252700 }, { "epoch": 0.16, "learning_rate": 4.193723983349343e-05, "loss": 1.8092, "step": 252800 }, { "epoch": 0.16, "learning_rate": 4.193403778418188e-05, "loss": 1.8329, "step": 252900 }, { "epoch": 0.16, "learning_rate": 4.193083573487032e-05, "loss": 1.816, "step": 253000 }, { "epoch": 0.16, "eval_loss": 1.7578144073486328, "eval_runtime": 91.9824, "eval_samples_per_second": 108.716, "eval_steps_per_second": 6.795, "step": 253000 }, { "epoch": 0.16, "learning_rate": 4.192763368555876e-05, "loss": 1.8023, "step": 253100 }, { "epoch": 0.16, "learning_rate": 4.1924431636247205e-05, "loss": 1.8078, "step": 253200 }, { "epoch": 0.16, "learning_rate": 4.192122958693564e-05, "loss": 1.7988, "step": 253300 }, { "epoch": 0.16, "learning_rate": 4.1918027537624085e-05, "loss": 1.8068, "step": 253400 }, { "epoch": 0.16, "learning_rate": 4.191482548831252e-05, "loss": 1.8532, "step": 253500 }, { "epoch": 0.16, "learning_rate": 4.1911623439000964e-05, "loss": 1.785, "step": 253600 }, { "epoch": 0.16, "learning_rate": 4.1908421389689404e-05, "loss": 1.8372, "step": 253700 }, { "epoch": 0.16, "learning_rate": 4.1905219340377844e-05, "loss": 1.804, "step": 253800 }, { "epoch": 0.16, "learning_rate": 4.1902017291066283e-05, "loss": 1.8242, "step": 253900 }, { "epoch": 0.16, "learning_rate": 4.189881524175472e-05, "loss": 1.8209, "step": 254000 }, { "epoch": 0.16, "eval_loss": 1.7573763132095337, "eval_runtime": 94.7319, "eval_samples_per_second": 105.561, "eval_steps_per_second": 6.598, "step": 254000 }, { "epoch": 0.16, "learning_rate": 4.189561319244317e-05, "loss": 1.8308, "step": 254100 }, { "epoch": 0.16, "learning_rate": 4.18924111431316e-05, "loss": 1.8191, "step": 254200 }, { "epoch": 0.16, "learning_rate": 4.188920909382005e-05, "loss": 1.8069, "step": 254300 }, { "epoch": 0.16, "learning_rate": 4.188600704450848e-05, "loss": 1.8192, "step": 254400 }, { "epoch": 0.16, "learning_rate": 4.188280499519693e-05, "loss": 1.8187, "step": 254500 }, { "epoch": 0.16, "learning_rate": 4.187960294588537e-05, "loss": 1.7835, "step": 254600 }, { "epoch": 0.16, "learning_rate": 4.187640089657381e-05, "loss": 1.8175, "step": 254700 }, { "epoch": 0.16, "learning_rate": 4.1873198847262255e-05, "loss": 1.826, "step": 254800 }, { "epoch": 0.16, "learning_rate": 4.186999679795069e-05, "loss": 1.8223, "step": 254900 }, { "epoch": 0.16, "learning_rate": 4.1866794748639134e-05, "loss": 1.8136, "step": 255000 }, { "epoch": 0.16, "eval_loss": 1.7599822282791138, "eval_runtime": 95.5846, "eval_samples_per_second": 104.619, "eval_steps_per_second": 6.539, "step": 255000 }, { "epoch": 0.16, "learning_rate": 4.186359269932757e-05, "loss": 1.8062, "step": 255100 }, { "epoch": 0.16, "learning_rate": 4.1860390650016014e-05, "loss": 1.8015, "step": 255200 }, { "epoch": 0.16, "learning_rate": 4.1857188600704453e-05, "loss": 1.809, "step": 255300 }, { "epoch": 0.16, "learning_rate": 4.185398655139289e-05, "loss": 1.8176, "step": 255400 }, { "epoch": 0.16, "learning_rate": 4.185078450208133e-05, "loss": 1.806, "step": 255500 }, { "epoch": 0.16, "learning_rate": 4.184758245276977e-05, "loss": 1.8067, "step": 255600 }, { "epoch": 0.16, "learning_rate": 4.184438040345822e-05, "loss": 1.8144, "step": 255700 }, { "epoch": 0.16, "learning_rate": 4.184117835414665e-05, "loss": 1.8318, "step": 255800 }, { "epoch": 0.16, "learning_rate": 4.18379763048351e-05, "loss": 1.8015, "step": 255900 }, { "epoch": 0.16, "learning_rate": 4.183477425552353e-05, "loss": 1.8007, "step": 256000 }, { "epoch": 0.16, "eval_loss": 1.758513331413269, "eval_runtime": 90.5608, "eval_samples_per_second": 110.423, "eval_steps_per_second": 6.901, "step": 256000 }, { "epoch": 0.16, "learning_rate": 4.183157220621198e-05, "loss": 1.8216, "step": 256100 }, { "epoch": 0.16, "learning_rate": 4.182837015690042e-05, "loss": 1.8199, "step": 256200 }, { "epoch": 0.16, "learning_rate": 4.182516810758886e-05, "loss": 1.8117, "step": 256300 }, { "epoch": 0.16, "learning_rate": 4.1821966058277304e-05, "loss": 1.8218, "step": 256400 }, { "epoch": 0.16, "learning_rate": 4.181876400896574e-05, "loss": 1.8234, "step": 256500 }, { "epoch": 0.16, "learning_rate": 4.1815561959654184e-05, "loss": 1.8276, "step": 256600 }, { "epoch": 0.16, "learning_rate": 4.1812359910342617e-05, "loss": 1.8018, "step": 256700 }, { "epoch": 0.16, "learning_rate": 4.180915786103106e-05, "loss": 1.829, "step": 256800 }, { "epoch": 0.16, "learning_rate": 4.18059558117195e-05, "loss": 1.7937, "step": 256900 }, { "epoch": 0.16, "learning_rate": 4.180275376240794e-05, "loss": 1.8097, "step": 257000 }, { "epoch": 0.16, "eval_loss": 1.7559857368469238, "eval_runtime": 89.9239, "eval_samples_per_second": 111.205, "eval_steps_per_second": 6.95, "step": 257000 }, { "epoch": 0.16, "learning_rate": 4.179955171309638e-05, "loss": 1.8129, "step": 257100 }, { "epoch": 0.16, "learning_rate": 4.179634966378482e-05, "loss": 1.7979, "step": 257200 }, { "epoch": 0.16, "learning_rate": 4.179314761447327e-05, "loss": 1.8331, "step": 257300 }, { "epoch": 0.16, "learning_rate": 4.17899455651617e-05, "loss": 1.8136, "step": 257400 }, { "epoch": 0.16, "learning_rate": 4.178674351585015e-05, "loss": 1.7891, "step": 257500 }, { "epoch": 0.16, "learning_rate": 4.178354146653859e-05, "loss": 1.7813, "step": 257600 }, { "epoch": 0.16, "learning_rate": 4.178033941722703e-05, "loss": 1.8042, "step": 257700 }, { "epoch": 0.16, "learning_rate": 4.177713736791547e-05, "loss": 1.8167, "step": 257800 }, { "epoch": 0.17, "learning_rate": 4.177393531860391e-05, "loss": 1.8188, "step": 257900 }, { "epoch": 0.17, "learning_rate": 4.1770733269292353e-05, "loss": 1.7967, "step": 258000 }, { "epoch": 0.17, "eval_loss": 1.7580071687698364, "eval_runtime": 92.7708, "eval_samples_per_second": 107.792, "eval_steps_per_second": 6.737, "step": 258000 }, { "epoch": 0.17, "learning_rate": 4.1767531219980786e-05, "loss": 1.8171, "step": 258100 }, { "epoch": 0.17, "learning_rate": 4.176432917066923e-05, "loss": 1.8239, "step": 258200 }, { "epoch": 0.17, "learning_rate": 4.1761127121357666e-05, "loss": 1.7934, "step": 258300 }, { "epoch": 0.17, "learning_rate": 4.175792507204611e-05, "loss": 1.8128, "step": 258400 }, { "epoch": 0.17, "learning_rate": 4.175472302273455e-05, "loss": 1.831, "step": 258500 }, { "epoch": 0.17, "learning_rate": 4.175152097342299e-05, "loss": 1.8135, "step": 258600 }, { "epoch": 0.17, "learning_rate": 4.174831892411143e-05, "loss": 1.8079, "step": 258700 }, { "epoch": 0.17, "learning_rate": 4.174511687479987e-05, "loss": 1.8207, "step": 258800 }, { "epoch": 0.17, "learning_rate": 4.174191482548832e-05, "loss": 1.8124, "step": 258900 }, { "epoch": 0.17, "learning_rate": 4.173871277617675e-05, "loss": 1.8036, "step": 259000 }, { "epoch": 0.17, "eval_loss": 1.7559622526168823, "eval_runtime": 95.9042, "eval_samples_per_second": 104.271, "eval_steps_per_second": 6.517, "step": 259000 }, { "epoch": 0.17, "learning_rate": 4.17355107268652e-05, "loss": 1.8323, "step": 259100 }, { "epoch": 0.17, "learning_rate": 4.173230867755364e-05, "loss": 1.8118, "step": 259200 }, { "epoch": 0.17, "learning_rate": 4.172910662824208e-05, "loss": 1.7962, "step": 259300 }, { "epoch": 0.17, "learning_rate": 4.1725904578930517e-05, "loss": 1.8256, "step": 259400 }, { "epoch": 0.17, "learning_rate": 4.1722702529618956e-05, "loss": 1.7893, "step": 259500 }, { "epoch": 0.17, "learning_rate": 4.17195004803074e-05, "loss": 1.8026, "step": 259600 }, { "epoch": 0.17, "learning_rate": 4.1716298430995836e-05, "loss": 1.8051, "step": 259700 }, { "epoch": 0.17, "learning_rate": 4.171309638168428e-05, "loss": 1.8064, "step": 259800 }, { "epoch": 0.17, "learning_rate": 4.170989433237272e-05, "loss": 1.801, "step": 259900 }, { "epoch": 0.17, "learning_rate": 4.170669228306116e-05, "loss": 1.8335, "step": 260000 }, { "epoch": 0.17, "eval_loss": 1.7539796829223633, "eval_runtime": 94.5167, "eval_samples_per_second": 105.801, "eval_steps_per_second": 6.613, "step": 260000 }, { "epoch": 0.17, "learning_rate": 4.17034902337496e-05, "loss": 1.8157, "step": 260100 }, { "epoch": 0.17, "learning_rate": 4.170028818443804e-05, "loss": 1.8152, "step": 260200 }, { "epoch": 0.17, "learning_rate": 4.169708613512648e-05, "loss": 1.8171, "step": 260300 }, { "epoch": 0.17, "learning_rate": 4.169388408581492e-05, "loss": 1.7992, "step": 260400 }, { "epoch": 0.17, "learning_rate": 4.169068203650337e-05, "loss": 1.8031, "step": 260500 }, { "epoch": 0.17, "learning_rate": 4.16874799871918e-05, "loss": 1.8233, "step": 260600 }, { "epoch": 0.17, "learning_rate": 4.168427793788025e-05, "loss": 1.8045, "step": 260700 }, { "epoch": 0.17, "learning_rate": 4.1681075888568686e-05, "loss": 1.7897, "step": 260800 }, { "epoch": 0.17, "learning_rate": 4.1677873839257126e-05, "loss": 1.8063, "step": 260900 }, { "epoch": 0.17, "learning_rate": 4.1674671789945566e-05, "loss": 1.8063, "step": 261000 }, { "epoch": 0.17, "eval_loss": 1.7584996223449707, "eval_runtime": 91.4507, "eval_samples_per_second": 109.349, "eval_steps_per_second": 6.834, "step": 261000 }, { "epoch": 0.17, "learning_rate": 4.1671469740634006e-05, "loss": 1.801, "step": 261100 }, { "epoch": 0.17, "learning_rate": 4.166826769132245e-05, "loss": 1.7886, "step": 261200 }, { "epoch": 0.17, "learning_rate": 4.1665065642010885e-05, "loss": 1.7909, "step": 261300 }, { "epoch": 0.17, "learning_rate": 4.166186359269933e-05, "loss": 1.8073, "step": 261400 }, { "epoch": 0.17, "learning_rate": 4.165866154338777e-05, "loss": 1.7834, "step": 261500 }, { "epoch": 0.17, "learning_rate": 4.165545949407621e-05, "loss": 1.8186, "step": 261600 }, { "epoch": 0.17, "learning_rate": 4.165225744476465e-05, "loss": 1.8164, "step": 261700 }, { "epoch": 0.17, "learning_rate": 4.164905539545309e-05, "loss": 1.7961, "step": 261800 }, { "epoch": 0.17, "learning_rate": 4.164585334614153e-05, "loss": 1.82, "step": 261900 }, { "epoch": 0.17, "learning_rate": 4.164265129682997e-05, "loss": 1.7801, "step": 262000 }, { "epoch": 0.17, "eval_loss": 1.7565683126449585, "eval_runtime": 92.0124, "eval_samples_per_second": 108.681, "eval_steps_per_second": 6.793, "step": 262000 }, { "epoch": 0.17, "learning_rate": 4.1639449247518417e-05, "loss": 1.8021, "step": 262100 }, { "epoch": 0.17, "learning_rate": 4.1636247198206856e-05, "loss": 1.8191, "step": 262200 }, { "epoch": 0.17, "learning_rate": 4.1633045148895296e-05, "loss": 1.8067, "step": 262300 }, { "epoch": 0.17, "learning_rate": 4.1629843099583736e-05, "loss": 1.8173, "step": 262400 }, { "epoch": 0.17, "learning_rate": 4.1626641050272176e-05, "loss": 1.8175, "step": 262500 }, { "epoch": 0.17, "learning_rate": 4.1623439000960615e-05, "loss": 1.8158, "step": 262600 }, { "epoch": 0.17, "learning_rate": 4.1620236951649055e-05, "loss": 1.7872, "step": 262700 }, { "epoch": 0.17, "learning_rate": 4.16170349023375e-05, "loss": 1.8021, "step": 262800 }, { "epoch": 0.17, "learning_rate": 4.161383285302594e-05, "loss": 1.7832, "step": 262900 }, { "epoch": 0.17, "learning_rate": 4.161063080371438e-05, "loss": 1.7994, "step": 263000 }, { "epoch": 0.17, "eval_loss": 1.7591261863708496, "eval_runtime": 96.2047, "eval_samples_per_second": 103.945, "eval_steps_per_second": 6.497, "step": 263000 }, { "epoch": 0.17, "learning_rate": 4.160742875440282e-05, "loss": 1.7879, "step": 263100 }, { "epoch": 0.17, "learning_rate": 4.160422670509126e-05, "loss": 1.7996, "step": 263200 }, { "epoch": 0.17, "learning_rate": 4.16010246557797e-05, "loss": 1.8096, "step": 263300 }, { "epoch": 0.17, "learning_rate": 4.159782260646814e-05, "loss": 1.7947, "step": 263400 }, { "epoch": 0.17, "learning_rate": 4.159462055715658e-05, "loss": 1.8015, "step": 263500 }, { "epoch": 0.17, "learning_rate": 4.159141850784502e-05, "loss": 1.8059, "step": 263600 }, { "epoch": 0.17, "learning_rate": 4.1588216458533466e-05, "loss": 1.8241, "step": 263700 }, { "epoch": 0.17, "learning_rate": 4.1585014409221906e-05, "loss": 1.8164, "step": 263800 }, { "epoch": 0.17, "learning_rate": 4.1581812359910345e-05, "loss": 1.8146, "step": 263900 }, { "epoch": 0.17, "learning_rate": 4.1578610310598785e-05, "loss": 1.7908, "step": 264000 }, { "epoch": 0.17, "eval_loss": 1.7551435232162476, "eval_runtime": 94.3064, "eval_samples_per_second": 106.037, "eval_steps_per_second": 6.627, "step": 264000 }, { "epoch": 0.17, "learning_rate": 4.1575408261287225e-05, "loss": 1.7923, "step": 264100 }, { "epoch": 0.17, "learning_rate": 4.1572206211975665e-05, "loss": 1.7883, "step": 264200 }, { "epoch": 0.17, "learning_rate": 4.1569004162664104e-05, "loss": 1.8081, "step": 264300 }, { "epoch": 0.17, "learning_rate": 4.156580211335255e-05, "loss": 1.7992, "step": 264400 }, { "epoch": 0.17, "learning_rate": 4.156260006404099e-05, "loss": 1.8141, "step": 264500 }, { "epoch": 0.17, "learning_rate": 4.155939801472943e-05, "loss": 1.8155, "step": 264600 }, { "epoch": 0.17, "learning_rate": 4.155619596541787e-05, "loss": 1.8225, "step": 264700 }, { "epoch": 0.17, "learning_rate": 4.155299391610631e-05, "loss": 1.8017, "step": 264800 }, { "epoch": 0.17, "learning_rate": 4.154979186679475e-05, "loss": 1.8237, "step": 264900 }, { "epoch": 0.17, "learning_rate": 4.154658981748319e-05, "loss": 1.8059, "step": 265000 }, { "epoch": 0.17, "eval_loss": 1.755921483039856, "eval_runtime": 92.6944, "eval_samples_per_second": 107.881, "eval_steps_per_second": 6.743, "step": 265000 }, { "epoch": 0.17, "learning_rate": 4.154338776817163e-05, "loss": 1.8061, "step": 265100 }, { "epoch": 0.17, "learning_rate": 4.1540185718860076e-05, "loss": 1.7805, "step": 265200 }, { "epoch": 0.17, "learning_rate": 4.1536983669548515e-05, "loss": 1.8153, "step": 265300 }, { "epoch": 0.17, "learning_rate": 4.1533781620236955e-05, "loss": 1.799, "step": 265400 }, { "epoch": 0.17, "learning_rate": 4.1530579570925395e-05, "loss": 1.8061, "step": 265500 }, { "epoch": 0.17, "learning_rate": 4.1527377521613835e-05, "loss": 1.8056, "step": 265600 }, { "epoch": 0.17, "learning_rate": 4.1524175472302274e-05, "loss": 1.7911, "step": 265700 }, { "epoch": 0.17, "learning_rate": 4.1520973422990714e-05, "loss": 1.8216, "step": 265800 }, { "epoch": 0.17, "learning_rate": 4.1517771373679154e-05, "loss": 1.8013, "step": 265900 }, { "epoch": 0.17, "learning_rate": 4.15145693243676e-05, "loss": 1.7969, "step": 266000 }, { "epoch": 0.17, "eval_loss": 1.7567123174667358, "eval_runtime": 91.8807, "eval_samples_per_second": 108.837, "eval_steps_per_second": 6.802, "step": 266000 }, { "epoch": 0.17, "learning_rate": 4.151136727505604e-05, "loss": 1.8018, "step": 266100 }, { "epoch": 0.17, "learning_rate": 4.150816522574448e-05, "loss": 1.7975, "step": 266200 }, { "epoch": 0.17, "learning_rate": 4.150496317643292e-05, "loss": 1.8034, "step": 266300 }, { "epoch": 0.17, "learning_rate": 4.150176112712136e-05, "loss": 1.8083, "step": 266400 }, { "epoch": 0.17, "learning_rate": 4.14985590778098e-05, "loss": 1.8163, "step": 266500 }, { "epoch": 0.17, "learning_rate": 4.149535702849824e-05, "loss": 1.8132, "step": 266600 }, { "epoch": 0.17, "learning_rate": 4.149215497918668e-05, "loss": 1.7867, "step": 266700 }, { "epoch": 0.17, "learning_rate": 4.1488952929875125e-05, "loss": 1.8045, "step": 266800 }, { "epoch": 0.17, "learning_rate": 4.1485750880563565e-05, "loss": 1.8103, "step": 266900 }, { "epoch": 0.17, "learning_rate": 4.1482548831252004e-05, "loss": 1.8085, "step": 267000 }, { "epoch": 0.17, "eval_loss": 1.758424162864685, "eval_runtime": 91.0791, "eval_samples_per_second": 109.795, "eval_steps_per_second": 6.862, "step": 267000 }, { "epoch": 0.17, "learning_rate": 4.1479346781940444e-05, "loss": 1.8109, "step": 267100 }, { "epoch": 0.17, "learning_rate": 4.1476144732628884e-05, "loss": 1.7858, "step": 267200 }, { "epoch": 0.17, "learning_rate": 4.1472942683317324e-05, "loss": 1.7958, "step": 267300 }, { "epoch": 0.17, "learning_rate": 4.1469740634005763e-05, "loss": 1.7971, "step": 267400 }, { "epoch": 0.17, "learning_rate": 4.146653858469421e-05, "loss": 1.7909, "step": 267500 }, { "epoch": 0.17, "learning_rate": 4.146333653538265e-05, "loss": 1.8173, "step": 267600 }, { "epoch": 0.17, "learning_rate": 4.146013448607109e-05, "loss": 1.8067, "step": 267700 }, { "epoch": 0.17, "learning_rate": 4.145693243675953e-05, "loss": 1.8132, "step": 267800 }, { "epoch": 0.17, "learning_rate": 4.145373038744797e-05, "loss": 1.83, "step": 267900 }, { "epoch": 0.17, "learning_rate": 4.145052833813641e-05, "loss": 1.7898, "step": 268000 }, { "epoch": 0.17, "eval_loss": 1.7581480741500854, "eval_runtime": 92.1641, "eval_samples_per_second": 108.502, "eval_steps_per_second": 6.781, "step": 268000 }, { "epoch": 0.17, "learning_rate": 4.144732628882485e-05, "loss": 1.8069, "step": 268100 }, { "epoch": 0.17, "learning_rate": 4.144412423951329e-05, "loss": 1.7682, "step": 268200 }, { "epoch": 0.17, "learning_rate": 4.144092219020173e-05, "loss": 1.8141, "step": 268300 }, { "epoch": 0.17, "learning_rate": 4.1437720140890174e-05, "loss": 1.8206, "step": 268400 }, { "epoch": 0.17, "learning_rate": 4.1434518091578614e-05, "loss": 1.8203, "step": 268500 }, { "epoch": 0.17, "learning_rate": 4.1431316042267054e-05, "loss": 1.8202, "step": 268600 }, { "epoch": 0.17, "learning_rate": 4.1428113992955494e-05, "loss": 1.7934, "step": 268700 }, { "epoch": 0.17, "learning_rate": 4.142491194364393e-05, "loss": 1.8072, "step": 268800 }, { "epoch": 0.17, "learning_rate": 4.142170989433237e-05, "loss": 1.8051, "step": 268900 }, { "epoch": 0.17, "learning_rate": 4.141850784502081e-05, "loss": 1.806, "step": 269000 }, { "epoch": 0.17, "eval_loss": 1.753213882446289, "eval_runtime": 92.917, "eval_samples_per_second": 107.623, "eval_steps_per_second": 6.726, "step": 269000 }, { "epoch": 0.17, "learning_rate": 4.141530579570926e-05, "loss": 1.7978, "step": 269100 }, { "epoch": 0.17, "learning_rate": 4.14121037463977e-05, "loss": 1.8102, "step": 269200 }, { "epoch": 0.17, "learning_rate": 4.140890169708614e-05, "loss": 1.8142, "step": 269300 }, { "epoch": 0.17, "learning_rate": 4.140569964777458e-05, "loss": 1.8127, "step": 269400 }, { "epoch": 0.17, "learning_rate": 4.140249759846302e-05, "loss": 1.7937, "step": 269500 }, { "epoch": 0.17, "learning_rate": 4.139929554915146e-05, "loss": 1.7971, "step": 269600 }, { "epoch": 0.17, "learning_rate": 4.13960934998399e-05, "loss": 1.8109, "step": 269700 }, { "epoch": 0.17, "learning_rate": 4.1392891450528344e-05, "loss": 1.8172, "step": 269800 }, { "epoch": 0.17, "learning_rate": 4.138968940121678e-05, "loss": 1.7802, "step": 269900 }, { "epoch": 0.17, "learning_rate": 4.1386487351905224e-05, "loss": 1.8025, "step": 270000 }, { "epoch": 0.17, "eval_loss": 1.7576011419296265, "eval_runtime": 93.2485, "eval_samples_per_second": 107.24, "eval_steps_per_second": 6.703, "step": 270000 }, { "epoch": 0.17, "learning_rate": 4.1383285302593663e-05, "loss": 1.8003, "step": 270100 }, { "epoch": 0.17, "learning_rate": 4.13800832532821e-05, "loss": 1.8094, "step": 270200 }, { "epoch": 0.17, "learning_rate": 4.137688120397054e-05, "loss": 1.7963, "step": 270300 }, { "epoch": 0.17, "learning_rate": 4.137367915465898e-05, "loss": 1.813, "step": 270400 }, { "epoch": 0.17, "learning_rate": 4.137047710534742e-05, "loss": 1.7953, "step": 270500 }, { "epoch": 0.17, "learning_rate": 4.136727505603586e-05, "loss": 1.817, "step": 270600 }, { "epoch": 0.17, "learning_rate": 4.136407300672431e-05, "loss": 1.8175, "step": 270700 }, { "epoch": 0.17, "learning_rate": 4.136087095741275e-05, "loss": 1.8087, "step": 270800 }, { "epoch": 0.17, "learning_rate": 4.135766890810119e-05, "loss": 1.8072, "step": 270900 }, { "epoch": 0.17, "learning_rate": 4.135446685878963e-05, "loss": 1.804, "step": 271000 }, { "epoch": 0.17, "eval_loss": 1.7540080547332764, "eval_runtime": 90.7464, "eval_samples_per_second": 110.197, "eval_steps_per_second": 6.887, "step": 271000 }, { "epoch": 0.17, "learning_rate": 4.135126480947807e-05, "loss": 1.8044, "step": 271100 }, { "epoch": 0.17, "learning_rate": 4.134806276016651e-05, "loss": 1.821, "step": 271200 }, { "epoch": 0.17, "learning_rate": 4.134486071085495e-05, "loss": 1.8208, "step": 271300 }, { "epoch": 0.17, "learning_rate": 4.1341658661543394e-05, "loss": 1.8007, "step": 271400 }, { "epoch": 0.17, "learning_rate": 4.1338456612231827e-05, "loss": 1.7961, "step": 271500 }, { "epoch": 0.17, "learning_rate": 4.133525456292027e-05, "loss": 1.8167, "step": 271600 }, { "epoch": 0.17, "learning_rate": 4.133205251360871e-05, "loss": 1.8004, "step": 271700 }, { "epoch": 0.17, "learning_rate": 4.132885046429715e-05, "loss": 1.7755, "step": 271800 }, { "epoch": 0.17, "learning_rate": 4.132564841498559e-05, "loss": 1.8096, "step": 271900 }, { "epoch": 0.17, "learning_rate": 4.132244636567403e-05, "loss": 1.7979, "step": 272000 }, { "epoch": 0.17, "eval_loss": 1.7514957189559937, "eval_runtime": 98.3041, "eval_samples_per_second": 101.725, "eval_steps_per_second": 6.358, "step": 272000 }, { "epoch": 0.17, "learning_rate": 4.131924431636248e-05, "loss": 1.7972, "step": 272100 }, { "epoch": 0.17, "learning_rate": 4.131604226705091e-05, "loss": 1.7869, "step": 272200 }, { "epoch": 0.17, "learning_rate": 4.131284021773936e-05, "loss": 1.797, "step": 272300 }, { "epoch": 0.17, "learning_rate": 4.13096381684278e-05, "loss": 1.8008, "step": 272400 }, { "epoch": 0.17, "learning_rate": 4.130643611911624e-05, "loss": 1.8006, "step": 272500 }, { "epoch": 0.17, "learning_rate": 4.130323406980468e-05, "loss": 1.8149, "step": 272600 }, { "epoch": 0.17, "learning_rate": 4.130003202049312e-05, "loss": 1.8091, "step": 272700 }, { "epoch": 0.17, "learning_rate": 4.1296829971181564e-05, "loss": 1.821, "step": 272800 }, { "epoch": 0.17, "learning_rate": 4.1293627921869996e-05, "loss": 1.7995, "step": 272900 }, { "epoch": 0.17, "learning_rate": 4.129042587255844e-05, "loss": 1.8135, "step": 273000 }, { "epoch": 0.17, "eval_loss": 1.755353331565857, "eval_runtime": 94.4146, "eval_samples_per_second": 105.916, "eval_steps_per_second": 6.62, "step": 273000 }, { "epoch": 0.17, "learning_rate": 4.1287223823246876e-05, "loss": 1.7945, "step": 273100 }, { "epoch": 0.17, "learning_rate": 4.128402177393532e-05, "loss": 1.7911, "step": 273200 }, { "epoch": 0.17, "learning_rate": 4.128081972462376e-05, "loss": 1.7949, "step": 273300 }, { "epoch": 0.17, "learning_rate": 4.12776176753122e-05, "loss": 1.805, "step": 273400 }, { "epoch": 0.18, "learning_rate": 4.127441562600064e-05, "loss": 1.7882, "step": 273500 }, { "epoch": 0.18, "learning_rate": 4.127121357668908e-05, "loss": 1.808, "step": 273600 }, { "epoch": 0.18, "learning_rate": 4.126801152737753e-05, "loss": 1.7896, "step": 273700 }, { "epoch": 0.18, "learning_rate": 4.126480947806596e-05, "loss": 1.7958, "step": 273800 }, { "epoch": 0.18, "learning_rate": 4.126160742875441e-05, "loss": 1.7912, "step": 273900 }, { "epoch": 0.18, "learning_rate": 4.125840537944285e-05, "loss": 1.794, "step": 274000 }, { "epoch": 0.18, "eval_loss": 1.7565677165985107, "eval_runtime": 92.645, "eval_samples_per_second": 107.939, "eval_steps_per_second": 6.746, "step": 274000 }, { "epoch": 0.18, "learning_rate": 4.125520333013129e-05, "loss": 1.7946, "step": 274100 }, { "epoch": 0.18, "learning_rate": 4.125200128081973e-05, "loss": 1.7912, "step": 274200 }, { "epoch": 0.18, "learning_rate": 4.1248799231508166e-05, "loss": 1.8272, "step": 274300 }, { "epoch": 0.18, "learning_rate": 4.124559718219661e-05, "loss": 1.7816, "step": 274400 }, { "epoch": 0.18, "learning_rate": 4.1242395132885046e-05, "loss": 1.8014, "step": 274500 }, { "epoch": 0.18, "learning_rate": 4.123919308357349e-05, "loss": 1.8063, "step": 274600 }, { "epoch": 0.18, "learning_rate": 4.1235991034261925e-05, "loss": 1.8033, "step": 274700 }, { "epoch": 0.18, "learning_rate": 4.123278898495037e-05, "loss": 1.8063, "step": 274800 }, { "epoch": 0.18, "learning_rate": 4.122958693563881e-05, "loss": 1.7914, "step": 274900 }, { "epoch": 0.18, "learning_rate": 4.122638488632725e-05, "loss": 1.7913, "step": 275000 }, { "epoch": 0.18, "eval_loss": 1.7535691261291504, "eval_runtime": 93.6871, "eval_samples_per_second": 106.738, "eval_steps_per_second": 6.671, "step": 275000 }, { "epoch": 0.18, "learning_rate": 4.12231828370157e-05, "loss": 1.7911, "step": 275100 }, { "epoch": 0.18, "learning_rate": 4.121998078770413e-05, "loss": 1.8104, "step": 275200 }, { "epoch": 0.18, "learning_rate": 4.121677873839258e-05, "loss": 1.8124, "step": 275300 }, { "epoch": 0.18, "learning_rate": 4.121357668908101e-05, "loss": 1.7807, "step": 275400 }, { "epoch": 0.18, "learning_rate": 4.121037463976946e-05, "loss": 1.8103, "step": 275500 }, { "epoch": 0.18, "learning_rate": 4.120717259045789e-05, "loss": 1.8071, "step": 275600 }, { "epoch": 0.18, "learning_rate": 4.1203970541146336e-05, "loss": 1.7895, "step": 275700 }, { "epoch": 0.18, "learning_rate": 4.1200768491834776e-05, "loss": 1.7873, "step": 275800 }, { "epoch": 0.18, "learning_rate": 4.1197566442523216e-05, "loss": 1.7808, "step": 275900 }, { "epoch": 0.18, "learning_rate": 4.119436439321166e-05, "loss": 1.7932, "step": 276000 }, { "epoch": 0.18, "eval_loss": 1.7531934976577759, "eval_runtime": 93.126, "eval_samples_per_second": 107.381, "eval_steps_per_second": 6.711, "step": 276000 }, { "epoch": 0.18, "learning_rate": 4.1191162343900095e-05, "loss": 1.8076, "step": 276100 }, { "epoch": 0.18, "learning_rate": 4.118796029458854e-05, "loss": 1.7915, "step": 276200 }, { "epoch": 0.18, "learning_rate": 4.1184758245276975e-05, "loss": 1.8127, "step": 276300 }, { "epoch": 0.18, "learning_rate": 4.118155619596542e-05, "loss": 1.8018, "step": 276400 }, { "epoch": 0.18, "learning_rate": 4.117835414665386e-05, "loss": 1.7921, "step": 276500 }, { "epoch": 0.18, "learning_rate": 4.11751520973423e-05, "loss": 1.7896, "step": 276600 }, { "epoch": 0.18, "learning_rate": 4.117195004803075e-05, "loss": 1.7911, "step": 276700 }, { "epoch": 0.18, "learning_rate": 4.116874799871918e-05, "loss": 1.8126, "step": 276800 }, { "epoch": 0.18, "learning_rate": 4.116554594940763e-05, "loss": 1.7913, "step": 276900 }, { "epoch": 0.18, "learning_rate": 4.116234390009606e-05, "loss": 1.7933, "step": 277000 }, { "epoch": 0.18, "eval_loss": 1.7546793222427368, "eval_runtime": 93.5563, "eval_samples_per_second": 106.888, "eval_steps_per_second": 6.68, "step": 277000 }, { "epoch": 0.18, "learning_rate": 4.1159141850784506e-05, "loss": 1.8065, "step": 277100 }, { "epoch": 0.18, "learning_rate": 4.115593980147294e-05, "loss": 1.7924, "step": 277200 }, { "epoch": 0.18, "learning_rate": 4.1152737752161386e-05, "loss": 1.8011, "step": 277300 }, { "epoch": 0.18, "learning_rate": 4.1149535702849825e-05, "loss": 1.7873, "step": 277400 }, { "epoch": 0.18, "learning_rate": 4.1146333653538265e-05, "loss": 1.8225, "step": 277500 }, { "epoch": 0.18, "learning_rate": 4.114313160422671e-05, "loss": 1.782, "step": 277600 }, { "epoch": 0.18, "learning_rate": 4.1139929554915145e-05, "loss": 1.7628, "step": 277700 }, { "epoch": 0.18, "learning_rate": 4.113672750560359e-05, "loss": 1.7912, "step": 277800 }, { "epoch": 0.18, "learning_rate": 4.1133525456292024e-05, "loss": 1.8081, "step": 277900 }, { "epoch": 0.18, "learning_rate": 4.113032340698047e-05, "loss": 1.7893, "step": 278000 }, { "epoch": 0.18, "eval_loss": 1.7536845207214355, "eval_runtime": 93.4147, "eval_samples_per_second": 107.05, "eval_steps_per_second": 6.691, "step": 278000 }, { "epoch": 0.18, "learning_rate": 4.112712135766891e-05, "loss": 1.8085, "step": 278100 }, { "epoch": 0.18, "learning_rate": 4.112391930835735e-05, "loss": 1.7741, "step": 278200 }, { "epoch": 0.18, "learning_rate": 4.1120717259045797e-05, "loss": 1.8004, "step": 278300 }, { "epoch": 0.18, "learning_rate": 4.111751520973423e-05, "loss": 1.8084, "step": 278400 }, { "epoch": 0.18, "learning_rate": 4.1114313160422676e-05, "loss": 1.7796, "step": 278500 }, { "epoch": 0.18, "learning_rate": 4.111111111111111e-05, "loss": 1.7799, "step": 278600 }, { "epoch": 0.18, "learning_rate": 4.1107909061799556e-05, "loss": 1.8153, "step": 278700 }, { "epoch": 0.18, "learning_rate": 4.110470701248799e-05, "loss": 1.8007, "step": 278800 }, { "epoch": 0.18, "learning_rate": 4.1101504963176435e-05, "loss": 1.8009, "step": 278900 }, { "epoch": 0.18, "learning_rate": 4.1098302913864875e-05, "loss": 1.797, "step": 279000 }, { "epoch": 0.18, "eval_loss": 1.7543021440505981, "eval_runtime": 93.6915, "eval_samples_per_second": 106.733, "eval_steps_per_second": 6.671, "step": 279000 }, { "epoch": 0.18, "learning_rate": 4.1095100864553314e-05, "loss": 1.789, "step": 279100 }, { "epoch": 0.18, "learning_rate": 4.109189881524176e-05, "loss": 1.8002, "step": 279200 }, { "epoch": 0.18, "learning_rate": 4.1088696765930194e-05, "loss": 1.7711, "step": 279300 }, { "epoch": 0.18, "learning_rate": 4.108549471661864e-05, "loss": 1.7703, "step": 279400 }, { "epoch": 0.18, "learning_rate": 4.1082292667307073e-05, "loss": 1.7786, "step": 279500 }, { "epoch": 0.18, "learning_rate": 4.107909061799552e-05, "loss": 1.8025, "step": 279600 }, { "epoch": 0.18, "learning_rate": 4.107588856868396e-05, "loss": 1.8134, "step": 279700 }, { "epoch": 0.18, "learning_rate": 4.10726865193724e-05, "loss": 1.8027, "step": 279800 }, { "epoch": 0.18, "learning_rate": 4.1069484470060846e-05, "loss": 1.773, "step": 279900 }, { "epoch": 0.18, "learning_rate": 4.106628242074928e-05, "loss": 1.7911, "step": 280000 }, { "epoch": 0.18, "eval_loss": 1.7515407800674438, "eval_runtime": 97.0071, "eval_samples_per_second": 103.085, "eval_steps_per_second": 6.443, "step": 280000 }, { "epoch": 0.18, "learning_rate": 4.1063080371437725e-05, "loss": 1.7969, "step": 280100 }, { "epoch": 0.18, "learning_rate": 4.105987832212616e-05, "loss": 1.808, "step": 280200 }, { "epoch": 0.18, "learning_rate": 4.1056676272814605e-05, "loss": 1.7917, "step": 280300 }, { "epoch": 0.18, "learning_rate": 4.105347422350304e-05, "loss": 1.7778, "step": 280400 }, { "epoch": 0.18, "learning_rate": 4.1050272174191484e-05, "loss": 1.8013, "step": 280500 }, { "epoch": 0.18, "learning_rate": 4.1047070124879924e-05, "loss": 1.8179, "step": 280600 }, { "epoch": 0.18, "learning_rate": 4.1043868075568364e-05, "loss": 1.7632, "step": 280700 }, { "epoch": 0.18, "learning_rate": 4.104066602625681e-05, "loss": 1.7983, "step": 280800 }, { "epoch": 0.18, "learning_rate": 4.103746397694524e-05, "loss": 1.8051, "step": 280900 }, { "epoch": 0.18, "learning_rate": 4.103426192763369e-05, "loss": 1.7943, "step": 281000 }, { "epoch": 0.18, "eval_loss": 1.7546894550323486, "eval_runtime": 95.9766, "eval_samples_per_second": 104.192, "eval_steps_per_second": 6.512, "step": 281000 }, { "epoch": 0.18, "learning_rate": 4.103105987832212e-05, "loss": 1.7897, "step": 281100 }, { "epoch": 0.18, "learning_rate": 4.102785782901057e-05, "loss": 1.7993, "step": 281200 }, { "epoch": 0.18, "learning_rate": 4.102465577969901e-05, "loss": 1.81, "step": 281300 }, { "epoch": 0.18, "learning_rate": 4.102145373038745e-05, "loss": 1.7744, "step": 281400 }, { "epoch": 0.18, "learning_rate": 4.1018251681075895e-05, "loss": 1.7913, "step": 281500 }, { "epoch": 0.18, "learning_rate": 4.101504963176433e-05, "loss": 1.7907, "step": 281600 }, { "epoch": 0.18, "learning_rate": 4.1011847582452775e-05, "loss": 1.7913, "step": 281700 }, { "epoch": 0.18, "learning_rate": 4.100864553314121e-05, "loss": 1.797, "step": 281800 }, { "epoch": 0.18, "learning_rate": 4.1005443483829654e-05, "loss": 1.7942, "step": 281900 }, { "epoch": 0.18, "learning_rate": 4.1002241434518094e-05, "loss": 1.8093, "step": 282000 }, { "epoch": 0.18, "eval_loss": 1.7496833801269531, "eval_runtime": 95.8476, "eval_samples_per_second": 104.332, "eval_steps_per_second": 6.521, "step": 282000 }, { "epoch": 0.18, "learning_rate": 4.0999039385206534e-05, "loss": 1.785, "step": 282100 }, { "epoch": 0.18, "learning_rate": 4.0995837335894973e-05, "loss": 1.809, "step": 282200 }, { "epoch": 0.18, "learning_rate": 4.099263528658341e-05, "loss": 1.8029, "step": 282300 }, { "epoch": 0.18, "learning_rate": 4.098943323727186e-05, "loss": 1.7959, "step": 282400 }, { "epoch": 0.18, "learning_rate": 4.098623118796029e-05, "loss": 1.7985, "step": 282500 }, { "epoch": 0.18, "learning_rate": 4.098302913864874e-05, "loss": 1.782, "step": 282600 }, { "epoch": 0.18, "learning_rate": 4.097982708933718e-05, "loss": 1.7927, "step": 282700 }, { "epoch": 0.18, "learning_rate": 4.097662504002562e-05, "loss": 1.7789, "step": 282800 }, { "epoch": 0.18, "learning_rate": 4.097342299071406e-05, "loss": 1.791, "step": 282900 }, { "epoch": 0.18, "learning_rate": 4.09702209414025e-05, "loss": 1.7903, "step": 283000 }, { "epoch": 0.18, "eval_loss": 1.7557775974273682, "eval_runtime": 94.7062, "eval_samples_per_second": 105.59, "eval_steps_per_second": 6.599, "step": 283000 }, { "epoch": 0.18, "learning_rate": 4.0967018892090945e-05, "loss": 1.7988, "step": 283100 }, { "epoch": 0.18, "learning_rate": 4.096381684277938e-05, "loss": 1.8032, "step": 283200 }, { "epoch": 0.18, "learning_rate": 4.0960614793467824e-05, "loss": 1.7887, "step": 283300 }, { "epoch": 0.18, "learning_rate": 4.095741274415626e-05, "loss": 1.8199, "step": 283400 }, { "epoch": 0.18, "learning_rate": 4.0954210694844704e-05, "loss": 1.7795, "step": 283500 }, { "epoch": 0.18, "learning_rate": 4.095100864553314e-05, "loss": 1.7937, "step": 283600 }, { "epoch": 0.18, "learning_rate": 4.094780659622158e-05, "loss": 1.788, "step": 283700 }, { "epoch": 0.18, "learning_rate": 4.094460454691002e-05, "loss": 1.8167, "step": 283800 }, { "epoch": 0.18, "learning_rate": 4.094140249759846e-05, "loss": 1.7981, "step": 283900 }, { "epoch": 0.18, "learning_rate": 4.093820044828691e-05, "loss": 1.7917, "step": 284000 }, { "epoch": 0.18, "eval_loss": 1.751325249671936, "eval_runtime": 96.156, "eval_samples_per_second": 103.998, "eval_steps_per_second": 6.5, "step": 284000 }, { "epoch": 0.18, "learning_rate": 4.093499839897534e-05, "loss": 1.8003, "step": 284100 }, { "epoch": 0.18, "learning_rate": 4.093179634966379e-05, "loss": 1.7993, "step": 284200 }, { "epoch": 0.18, "learning_rate": 4.092859430035223e-05, "loss": 1.8046, "step": 284300 }, { "epoch": 0.18, "learning_rate": 4.092539225104067e-05, "loss": 1.7872, "step": 284400 }, { "epoch": 0.18, "learning_rate": 4.092219020172911e-05, "loss": 1.8073, "step": 284500 }, { "epoch": 0.18, "learning_rate": 4.091898815241755e-05, "loss": 1.8059, "step": 284600 }, { "epoch": 0.18, "learning_rate": 4.0915786103105994e-05, "loss": 1.7979, "step": 284700 }, { "epoch": 0.18, "learning_rate": 4.091258405379443e-05, "loss": 1.7959, "step": 284800 }, { "epoch": 0.18, "learning_rate": 4.0909382004482874e-05, "loss": 1.8038, "step": 284900 }, { "epoch": 0.18, "learning_rate": 4.090617995517131e-05, "loss": 1.795, "step": 285000 }, { "epoch": 0.18, "eval_loss": 1.7536070346832275, "eval_runtime": 91.7157, "eval_samples_per_second": 109.033, "eval_steps_per_second": 6.815, "step": 285000 }, { "epoch": 0.18, "learning_rate": 4.090297790585975e-05, "loss": 1.7842, "step": 285100 }, { "epoch": 0.18, "learning_rate": 4.089977585654819e-05, "loss": 1.7999, "step": 285200 }, { "epoch": 0.18, "learning_rate": 4.089657380723663e-05, "loss": 1.7808, "step": 285300 }, { "epoch": 0.18, "learning_rate": 4.089337175792507e-05, "loss": 1.7872, "step": 285400 }, { "epoch": 0.18, "learning_rate": 4.089016970861351e-05, "loss": 1.7822, "step": 285500 }, { "epoch": 0.18, "learning_rate": 4.088696765930196e-05, "loss": 1.8034, "step": 285600 }, { "epoch": 0.18, "learning_rate": 4.088376560999039e-05, "loss": 1.8069, "step": 285700 }, { "epoch": 0.18, "learning_rate": 4.088056356067884e-05, "loss": 1.7965, "step": 285800 }, { "epoch": 0.18, "learning_rate": 4.087736151136728e-05, "loss": 1.7998, "step": 285900 }, { "epoch": 0.18, "learning_rate": 4.087415946205572e-05, "loss": 1.7797, "step": 286000 }, { "epoch": 0.18, "eval_loss": 1.7520408630371094, "eval_runtime": 90.4984, "eval_samples_per_second": 110.499, "eval_steps_per_second": 6.906, "step": 286000 }, { "epoch": 0.18, "learning_rate": 4.087095741274416e-05, "loss": 1.8024, "step": 286100 }, { "epoch": 0.18, "learning_rate": 4.08677553634326e-05, "loss": 1.7942, "step": 286200 }, { "epoch": 0.18, "learning_rate": 4.0864553314121043e-05, "loss": 1.7904, "step": 286300 }, { "epoch": 0.18, "learning_rate": 4.0861351264809476e-05, "loss": 1.7861, "step": 286400 }, { "epoch": 0.18, "learning_rate": 4.085814921549792e-05, "loss": 1.7847, "step": 286500 }, { "epoch": 0.18, "learning_rate": 4.085494716618636e-05, "loss": 1.7787, "step": 286600 }, { "epoch": 0.18, "learning_rate": 4.08517451168748e-05, "loss": 1.8092, "step": 286700 }, { "epoch": 0.18, "learning_rate": 4.084854306756324e-05, "loss": 1.7601, "step": 286800 }, { "epoch": 0.18, "learning_rate": 4.084534101825168e-05, "loss": 1.7793, "step": 286900 }, { "epoch": 0.18, "learning_rate": 4.084213896894012e-05, "loss": 1.8035, "step": 287000 }, { "epoch": 0.18, "eval_loss": 1.7495007514953613, "eval_runtime": 92.6224, "eval_samples_per_second": 107.965, "eval_steps_per_second": 6.748, "step": 287000 }, { "epoch": 0.18, "learning_rate": 4.083893691962856e-05, "loss": 1.7925, "step": 287100 }, { "epoch": 0.18, "learning_rate": 4.083573487031701e-05, "loss": 1.7863, "step": 287200 }, { "epoch": 0.18, "learning_rate": 4.083253282100545e-05, "loss": 1.7986, "step": 287300 }, { "epoch": 0.18, "learning_rate": 4.082933077169389e-05, "loss": 1.7865, "step": 287400 }, { "epoch": 0.18, "learning_rate": 4.082612872238233e-05, "loss": 1.8057, "step": 287500 }, { "epoch": 0.18, "learning_rate": 4.082292667307077e-05, "loss": 1.7987, "step": 287600 }, { "epoch": 0.18, "learning_rate": 4.0819724623759207e-05, "loss": 1.7886, "step": 287700 }, { "epoch": 0.18, "learning_rate": 4.0816522574447646e-05, "loss": 1.7816, "step": 287800 }, { "epoch": 0.18, "learning_rate": 4.081332052513609e-05, "loss": 1.8021, "step": 287900 }, { "epoch": 0.18, "learning_rate": 4.0810118475824526e-05, "loss": 1.7933, "step": 288000 }, { "epoch": 0.18, "eval_loss": 1.752441644668579, "eval_runtime": 89.9017, "eval_samples_per_second": 111.233, "eval_steps_per_second": 6.952, "step": 288000 }, { "epoch": 0.18, "learning_rate": 4.080691642651297e-05, "loss": 1.7924, "step": 288100 }, { "epoch": 0.18, "learning_rate": 4.080371437720141e-05, "loss": 1.7851, "step": 288200 }, { "epoch": 0.18, "learning_rate": 4.080051232788985e-05, "loss": 1.7908, "step": 288300 }, { "epoch": 0.18, "learning_rate": 4.079731027857829e-05, "loss": 1.7832, "step": 288400 }, { "epoch": 0.18, "learning_rate": 4.079410822926673e-05, "loss": 1.7807, "step": 288500 }, { "epoch": 0.18, "learning_rate": 4.079090617995517e-05, "loss": 1.7744, "step": 288600 }, { "epoch": 0.18, "learning_rate": 4.078770413064361e-05, "loss": 1.7864, "step": 288700 }, { "epoch": 0.18, "learning_rate": 4.078450208133206e-05, "loss": 1.8064, "step": 288800 }, { "epoch": 0.18, "learning_rate": 4.07813000320205e-05, "loss": 1.8061, "step": 288900 }, { "epoch": 0.18, "learning_rate": 4.077809798270894e-05, "loss": 1.8101, "step": 289000 }, { "epoch": 0.18, "eval_loss": 1.7501373291015625, "eval_runtime": 89.7546, "eval_samples_per_second": 111.415, "eval_steps_per_second": 6.963, "step": 289000 }, { "epoch": 0.19, "learning_rate": 4.0774895933397376e-05, "loss": 1.8026, "step": 289100 }, { "epoch": 0.19, "learning_rate": 4.0771693884085816e-05, "loss": 1.8065, "step": 289200 }, { "epoch": 0.19, "learning_rate": 4.0768491834774256e-05, "loss": 1.7832, "step": 289300 }, { "epoch": 0.19, "learning_rate": 4.0765289785462696e-05, "loss": 1.7949, "step": 289400 }, { "epoch": 0.19, "learning_rate": 4.076208773615114e-05, "loss": 1.7877, "step": 289500 }, { "epoch": 0.19, "learning_rate": 4.075888568683958e-05, "loss": 1.8036, "step": 289600 }, { "epoch": 0.19, "learning_rate": 4.075568363752802e-05, "loss": 1.7808, "step": 289700 }, { "epoch": 0.19, "learning_rate": 4.075248158821646e-05, "loss": 1.7921, "step": 289800 }, { "epoch": 0.19, "learning_rate": 4.07492795389049e-05, "loss": 1.8006, "step": 289900 }, { "epoch": 0.19, "learning_rate": 4.074607748959334e-05, "loss": 1.7772, "step": 290000 }, { "epoch": 0.19, "eval_loss": 1.7509233951568604, "eval_runtime": 92.4769, "eval_samples_per_second": 108.135, "eval_steps_per_second": 6.758, "step": 290000 }, { "epoch": 0.19, "learning_rate": 4.074287544028178e-05, "loss": 1.7718, "step": 290100 }, { "epoch": 0.19, "learning_rate": 4.073967339097022e-05, "loss": 1.7981, "step": 290200 }, { "epoch": 0.19, "learning_rate": 4.073647134165866e-05, "loss": 1.7895, "step": 290300 }, { "epoch": 0.19, "learning_rate": 4.0733269292347107e-05, "loss": 1.8071, "step": 290400 }, { "epoch": 0.19, "learning_rate": 4.0730067243035546e-05, "loss": 1.7771, "step": 290500 }, { "epoch": 0.19, "learning_rate": 4.0726865193723986e-05, "loss": 1.8027, "step": 290600 }, { "epoch": 0.19, "learning_rate": 4.0723663144412426e-05, "loss": 1.7901, "step": 290700 }, { "epoch": 0.19, "learning_rate": 4.0720461095100866e-05, "loss": 1.7967, "step": 290800 }, { "epoch": 0.19, "learning_rate": 4.0717259045789305e-05, "loss": 1.8033, "step": 290900 }, { "epoch": 0.19, "learning_rate": 4.0714056996477745e-05, "loss": 1.7805, "step": 291000 }, { "epoch": 0.19, "eval_loss": 1.7524011135101318, "eval_runtime": 92.9425, "eval_samples_per_second": 107.593, "eval_steps_per_second": 6.725, "step": 291000 }, { "epoch": 0.19, "learning_rate": 4.071085494716619e-05, "loss": 1.7946, "step": 291100 }, { "epoch": 0.19, "learning_rate": 4.070765289785463e-05, "loss": 1.7724, "step": 291200 }, { "epoch": 0.19, "learning_rate": 4.070445084854307e-05, "loss": 1.804, "step": 291300 }, { "epoch": 0.19, "learning_rate": 4.070124879923151e-05, "loss": 1.8067, "step": 291400 }, { "epoch": 0.19, "learning_rate": 4.069804674991995e-05, "loss": 1.799, "step": 291500 }, { "epoch": 0.19, "learning_rate": 4.069484470060839e-05, "loss": 1.8034, "step": 291600 }, { "epoch": 0.19, "learning_rate": 4.069164265129683e-05, "loss": 1.7868, "step": 291700 }, { "epoch": 0.19, "learning_rate": 4.068844060198527e-05, "loss": 1.7741, "step": 291800 }, { "epoch": 0.19, "learning_rate": 4.0685238552673716e-05, "loss": 1.7791, "step": 291900 }, { "epoch": 0.19, "learning_rate": 4.0682036503362156e-05, "loss": 1.7839, "step": 292000 }, { "epoch": 0.19, "eval_loss": 1.750470519065857, "eval_runtime": 96.602, "eval_samples_per_second": 103.518, "eval_steps_per_second": 6.47, "step": 292000 }, { "epoch": 0.19, "learning_rate": 4.0678834454050596e-05, "loss": 1.7511, "step": 292100 }, { "epoch": 0.19, "learning_rate": 4.0675632404739035e-05, "loss": 1.7857, "step": 292200 }, { "epoch": 0.19, "learning_rate": 4.0672430355427475e-05, "loss": 1.7755, "step": 292300 }, { "epoch": 0.19, "learning_rate": 4.0669228306115915e-05, "loss": 1.7792, "step": 292400 }, { "epoch": 0.19, "learning_rate": 4.0666026256804355e-05, "loss": 1.7854, "step": 292500 }, { "epoch": 0.19, "learning_rate": 4.06628242074928e-05, "loss": 1.8096, "step": 292600 }, { "epoch": 0.19, "learning_rate": 4.065962215818124e-05, "loss": 1.7859, "step": 292700 }, { "epoch": 0.19, "learning_rate": 4.065642010886968e-05, "loss": 1.7807, "step": 292800 }, { "epoch": 0.19, "learning_rate": 4.065321805955812e-05, "loss": 1.7959, "step": 292900 }, { "epoch": 0.19, "learning_rate": 4.065001601024656e-05, "loss": 1.7998, "step": 293000 }, { "epoch": 0.19, "eval_loss": 1.751732587814331, "eval_runtime": 92.9582, "eval_samples_per_second": 107.575, "eval_steps_per_second": 6.723, "step": 293000 }, { "epoch": 0.19, "learning_rate": 4.0646813960935e-05, "loss": 1.7933, "step": 293100 }, { "epoch": 0.19, "learning_rate": 4.064361191162344e-05, "loss": 1.7883, "step": 293200 }, { "epoch": 0.19, "learning_rate": 4.064040986231188e-05, "loss": 1.7921, "step": 293300 }, { "epoch": 0.19, "learning_rate": 4.063720781300032e-05, "loss": 1.7886, "step": 293400 }, { "epoch": 0.19, "learning_rate": 4.0634005763688766e-05, "loss": 1.7723, "step": 293500 }, { "epoch": 0.19, "learning_rate": 4.0630803714377205e-05, "loss": 1.8072, "step": 293600 }, { "epoch": 0.19, "learning_rate": 4.0627601665065645e-05, "loss": 1.8094, "step": 293700 }, { "epoch": 0.19, "learning_rate": 4.0624399615754085e-05, "loss": 1.7805, "step": 293800 }, { "epoch": 0.19, "learning_rate": 4.0621197566442525e-05, "loss": 1.7871, "step": 293900 }, { "epoch": 0.19, "learning_rate": 4.0617995517130964e-05, "loss": 1.7994, "step": 294000 }, { "epoch": 0.19, "eval_loss": 1.751785159111023, "eval_runtime": 95.2627, "eval_samples_per_second": 104.973, "eval_steps_per_second": 6.561, "step": 294000 }, { "epoch": 0.19, "learning_rate": 4.0614793467819404e-05, "loss": 1.7833, "step": 294100 }, { "epoch": 0.19, "learning_rate": 4.061159141850785e-05, "loss": 1.7933, "step": 294200 }, { "epoch": 0.19, "learning_rate": 4.0608389369196284e-05, "loss": 1.7889, "step": 294300 }, { "epoch": 0.19, "learning_rate": 4.060518731988473e-05, "loss": 1.7844, "step": 294400 }, { "epoch": 0.19, "learning_rate": 4.060198527057317e-05, "loss": 1.8079, "step": 294500 }, { "epoch": 0.19, "learning_rate": 4.059878322126161e-05, "loss": 1.795, "step": 294600 }, { "epoch": 0.19, "learning_rate": 4.059558117195005e-05, "loss": 1.802, "step": 294700 }, { "epoch": 0.19, "learning_rate": 4.059237912263849e-05, "loss": 1.7618, "step": 294800 }, { "epoch": 0.19, "learning_rate": 4.0589177073326935e-05, "loss": 1.816, "step": 294900 }, { "epoch": 0.19, "learning_rate": 4.058597502401537e-05, "loss": 1.7838, "step": 295000 }, { "epoch": 0.19, "eval_loss": 1.7522904872894287, "eval_runtime": 94.0748, "eval_samples_per_second": 106.298, "eval_steps_per_second": 6.644, "step": 295000 }, { "epoch": 0.19, "learning_rate": 4.0582772974703815e-05, "loss": 1.7902, "step": 295100 }, { "epoch": 0.19, "learning_rate": 4.0579570925392255e-05, "loss": 1.7866, "step": 295200 }, { "epoch": 0.19, "learning_rate": 4.0576368876080694e-05, "loss": 1.7812, "step": 295300 }, { "epoch": 0.19, "learning_rate": 4.0573166826769134e-05, "loss": 1.7882, "step": 295400 }, { "epoch": 0.19, "learning_rate": 4.0569964777457574e-05, "loss": 1.7874, "step": 295500 }, { "epoch": 0.19, "learning_rate": 4.0566762728146014e-05, "loss": 1.7896, "step": 295600 }, { "epoch": 0.19, "learning_rate": 4.0563560678834453e-05, "loss": 1.7873, "step": 295700 }, { "epoch": 0.19, "learning_rate": 4.05603586295229e-05, "loss": 1.8018, "step": 295800 }, { "epoch": 0.19, "learning_rate": 4.055715658021133e-05, "loss": 1.792, "step": 295900 }, { "epoch": 0.19, "learning_rate": 4.055395453089978e-05, "loss": 1.7793, "step": 296000 }, { "epoch": 0.19, "eval_loss": 1.7492058277130127, "eval_runtime": 88.1787, "eval_samples_per_second": 113.406, "eval_steps_per_second": 7.088, "step": 296000 }, { "epoch": 0.19, "learning_rate": 4.055075248158822e-05, "loss": 1.7869, "step": 296100 }, { "epoch": 0.19, "learning_rate": 4.054755043227666e-05, "loss": 1.7702, "step": 296200 }, { "epoch": 0.19, "learning_rate": 4.05443483829651e-05, "loss": 1.7822, "step": 296300 }, { "epoch": 0.19, "learning_rate": 4.054114633365354e-05, "loss": 1.7946, "step": 296400 }, { "epoch": 0.19, "learning_rate": 4.0537944284341985e-05, "loss": 1.7817, "step": 296500 }, { "epoch": 0.19, "learning_rate": 4.053474223503042e-05, "loss": 1.7706, "step": 296600 }, { "epoch": 0.19, "learning_rate": 4.0531540185718864e-05, "loss": 1.7831, "step": 296700 }, { "epoch": 0.19, "learning_rate": 4.0528338136407304e-05, "loss": 1.8195, "step": 296800 }, { "epoch": 0.19, "learning_rate": 4.0525136087095744e-05, "loss": 1.7805, "step": 296900 }, { "epoch": 0.19, "learning_rate": 4.0521934037784184e-05, "loss": 1.7658, "step": 297000 }, { "epoch": 0.19, "eval_loss": 1.7529089450836182, "eval_runtime": 88.4429, "eval_samples_per_second": 113.067, "eval_steps_per_second": 7.067, "step": 297000 }, { "epoch": 0.19, "learning_rate": 4.051873198847262e-05, "loss": 1.7812, "step": 297100 }, { "epoch": 0.19, "learning_rate": 4.051552993916107e-05, "loss": 1.7752, "step": 297200 }, { "epoch": 0.19, "learning_rate": 4.05123278898495e-05, "loss": 1.7989, "step": 297300 }, { "epoch": 0.19, "learning_rate": 4.050912584053795e-05, "loss": 1.7652, "step": 297400 }, { "epoch": 0.19, "learning_rate": 4.050592379122638e-05, "loss": 1.7972, "step": 297500 }, { "epoch": 0.19, "learning_rate": 4.050272174191483e-05, "loss": 1.7785, "step": 297600 }, { "epoch": 0.19, "learning_rate": 4.049951969260327e-05, "loss": 1.7966, "step": 297700 }, { "epoch": 0.19, "learning_rate": 4.049631764329171e-05, "loss": 1.7782, "step": 297800 }, { "epoch": 0.19, "learning_rate": 4.049311559398015e-05, "loss": 1.7778, "step": 297900 }, { "epoch": 0.19, "learning_rate": 4.048991354466859e-05, "loss": 1.764, "step": 298000 }, { "epoch": 0.19, "eval_loss": 1.752527117729187, "eval_runtime": 88.9559, "eval_samples_per_second": 112.415, "eval_steps_per_second": 7.026, "step": 298000 }, { "epoch": 0.19, "learning_rate": 4.0486711495357034e-05, "loss": 1.7931, "step": 298100 }, { "epoch": 0.19, "learning_rate": 4.048350944604547e-05, "loss": 1.7703, "step": 298200 }, { "epoch": 0.19, "learning_rate": 4.0480307396733914e-05, "loss": 1.7933, "step": 298300 }, { "epoch": 0.19, "learning_rate": 4.0477105347422353e-05, "loss": 1.7916, "step": 298400 }, { "epoch": 0.19, "learning_rate": 4.047390329811079e-05, "loss": 1.7763, "step": 298500 }, { "epoch": 0.19, "learning_rate": 4.047070124879923e-05, "loss": 1.8056, "step": 298600 }, { "epoch": 0.19, "learning_rate": 4.046749919948767e-05, "loss": 1.7887, "step": 298700 }, { "epoch": 0.19, "learning_rate": 4.046429715017612e-05, "loss": 1.7723, "step": 298800 }, { "epoch": 0.19, "learning_rate": 4.046109510086455e-05, "loss": 1.7847, "step": 298900 }, { "epoch": 0.19, "learning_rate": 4.0457893051553e-05, "loss": 1.7896, "step": 299000 }, { "epoch": 0.19, "eval_loss": 1.7499617338180542, "eval_runtime": 88.5615, "eval_samples_per_second": 112.916, "eval_steps_per_second": 7.057, "step": 299000 }, { "epoch": 0.19, "learning_rate": 4.045469100224143e-05, "loss": 1.7736, "step": 299100 }, { "epoch": 0.19, "learning_rate": 4.045148895292988e-05, "loss": 1.7714, "step": 299200 }, { "epoch": 0.19, "learning_rate": 4.044828690361832e-05, "loss": 1.8048, "step": 299300 }, { "epoch": 0.19, "learning_rate": 4.044508485430676e-05, "loss": 1.7586, "step": 299400 }, { "epoch": 0.19, "learning_rate": 4.0441882804995204e-05, "loss": 1.7773, "step": 299500 }, { "epoch": 0.19, "learning_rate": 4.043868075568364e-05, "loss": 1.7751, "step": 299600 }, { "epoch": 0.19, "learning_rate": 4.0435478706372084e-05, "loss": 1.7861, "step": 299700 }, { "epoch": 0.19, "learning_rate": 4.0432276657060517e-05, "loss": 1.7707, "step": 299800 }, { "epoch": 0.19, "learning_rate": 4.042907460774896e-05, "loss": 1.8094, "step": 299900 }, { "epoch": 0.19, "learning_rate": 4.04258725584374e-05, "loss": 1.8042, "step": 300000 }, { "epoch": 0.19, "eval_loss": 1.7503160238265991, "eval_runtime": 91.6751, "eval_samples_per_second": 109.081, "eval_steps_per_second": 6.818, "step": 300000 }, { "epoch": 0.19, "learning_rate": 4.042267050912584e-05, "loss": 1.7762, "step": 300100 }, { "epoch": 0.19, "learning_rate": 4.041946845981428e-05, "loss": 1.7767, "step": 300200 }, { "epoch": 0.19, "learning_rate": 4.041626641050272e-05, "loss": 1.7795, "step": 300300 }, { "epoch": 0.19, "learning_rate": 4.041306436119117e-05, "loss": 1.7826, "step": 300400 }, { "epoch": 0.19, "learning_rate": 4.04098623118796e-05, "loss": 1.7856, "step": 300500 }, { "epoch": 0.19, "learning_rate": 4.040666026256805e-05, "loss": 1.7862, "step": 300600 }, { "epoch": 0.19, "learning_rate": 4.040345821325648e-05, "loss": 1.8079, "step": 300700 }, { "epoch": 0.19, "learning_rate": 4.040025616394493e-05, "loss": 1.8041, "step": 300800 }, { "epoch": 0.19, "learning_rate": 4.039705411463337e-05, "loss": 1.7976, "step": 300900 }, { "epoch": 0.19, "learning_rate": 4.039385206532181e-05, "loss": 1.7956, "step": 301000 }, { "epoch": 0.19, "eval_loss": 1.7497564554214478, "eval_runtime": 91.6441, "eval_samples_per_second": 109.118, "eval_steps_per_second": 6.82, "step": 301000 }, { "epoch": 0.19, "learning_rate": 4.0390650016010254e-05, "loss": 1.794, "step": 301100 }, { "epoch": 0.19, "learning_rate": 4.0387447966698686e-05, "loss": 1.7689, "step": 301200 }, { "epoch": 0.19, "learning_rate": 4.038424591738713e-05, "loss": 1.7757, "step": 301300 }, { "epoch": 0.19, "learning_rate": 4.0381043868075566e-05, "loss": 1.7937, "step": 301400 }, { "epoch": 0.19, "learning_rate": 4.037784181876401e-05, "loss": 1.7857, "step": 301500 }, { "epoch": 0.19, "learning_rate": 4.037463976945245e-05, "loss": 1.7732, "step": 301600 }, { "epoch": 0.19, "learning_rate": 4.037143772014089e-05, "loss": 1.7813, "step": 301700 }, { "epoch": 0.19, "learning_rate": 4.036823567082934e-05, "loss": 1.7982, "step": 301800 }, { "epoch": 0.19, "learning_rate": 4.036503362151777e-05, "loss": 1.7934, "step": 301900 }, { "epoch": 0.19, "learning_rate": 4.036183157220622e-05, "loss": 1.7751, "step": 302000 }, { "epoch": 0.19, "eval_loss": 1.7498928308486938, "eval_runtime": 89.2611, "eval_samples_per_second": 112.031, "eval_steps_per_second": 7.002, "step": 302000 }, { "epoch": 0.19, "learning_rate": 4.035862952289465e-05, "loss": 1.7709, "step": 302100 }, { "epoch": 0.19, "learning_rate": 4.03554274735831e-05, "loss": 1.8013, "step": 302200 }, { "epoch": 0.19, "learning_rate": 4.035222542427153e-05, "loss": 1.7979, "step": 302300 }, { "epoch": 0.19, "learning_rate": 4.034902337495998e-05, "loss": 1.7851, "step": 302400 }, { "epoch": 0.19, "learning_rate": 4.034582132564842e-05, "loss": 1.765, "step": 302500 }, { "epoch": 0.19, "learning_rate": 4.0342619276336856e-05, "loss": 1.7794, "step": 302600 }, { "epoch": 0.19, "learning_rate": 4.03394172270253e-05, "loss": 1.7841, "step": 302700 }, { "epoch": 0.19, "learning_rate": 4.0336215177713736e-05, "loss": 1.7897, "step": 302800 }, { "epoch": 0.19, "learning_rate": 4.033301312840218e-05, "loss": 1.7727, "step": 302900 }, { "epoch": 0.19, "learning_rate": 4.0329811079090615e-05, "loss": 1.775, "step": 303000 }, { "epoch": 0.19, "eval_loss": 1.7509160041809082, "eval_runtime": 91.9131, "eval_samples_per_second": 108.798, "eval_steps_per_second": 6.8, "step": 303000 }, { "epoch": 0.19, "learning_rate": 4.032660902977906e-05, "loss": 1.7865, "step": 303100 }, { "epoch": 0.19, "learning_rate": 4.03234069804675e-05, "loss": 1.7865, "step": 303200 }, { "epoch": 0.19, "learning_rate": 4.032020493115594e-05, "loss": 1.7755, "step": 303300 }, { "epoch": 0.19, "learning_rate": 4.031700288184439e-05, "loss": 1.7845, "step": 303400 }, { "epoch": 0.19, "learning_rate": 4.031380083253282e-05, "loss": 1.7879, "step": 303500 }, { "epoch": 0.19, "learning_rate": 4.031059878322127e-05, "loss": 1.7722, "step": 303600 }, { "epoch": 0.19, "learning_rate": 4.03073967339097e-05, "loss": 1.7727, "step": 303700 }, { "epoch": 0.19, "learning_rate": 4.030419468459815e-05, "loss": 1.786, "step": 303800 }, { "epoch": 0.19, "learning_rate": 4.030099263528658e-05, "loss": 1.7755, "step": 303900 }, { "epoch": 0.19, "learning_rate": 4.0297790585975026e-05, "loss": 1.7742, "step": 304000 }, { "epoch": 0.19, "eval_loss": 1.7516933679580688, "eval_runtime": 92.1351, "eval_samples_per_second": 108.536, "eval_steps_per_second": 6.784, "step": 304000 }, { "epoch": 0.19, "learning_rate": 4.0294588536663466e-05, "loss": 1.7845, "step": 304100 }, { "epoch": 0.19, "learning_rate": 4.0291386487351906e-05, "loss": 1.7852, "step": 304200 }, { "epoch": 0.19, "learning_rate": 4.028818443804035e-05, "loss": 1.7915, "step": 304300 }, { "epoch": 0.19, "learning_rate": 4.0284982388728785e-05, "loss": 1.7646, "step": 304400 }, { "epoch": 0.19, "learning_rate": 4.028178033941723e-05, "loss": 1.798, "step": 304500 }, { "epoch": 0.19, "learning_rate": 4.0278578290105665e-05, "loss": 1.7763, "step": 304600 }, { "epoch": 0.2, "learning_rate": 4.027537624079411e-05, "loss": 1.7724, "step": 304700 }, { "epoch": 0.2, "learning_rate": 4.027217419148255e-05, "loss": 1.7822, "step": 304800 }, { "epoch": 0.2, "learning_rate": 4.026897214217099e-05, "loss": 1.7769, "step": 304900 }, { "epoch": 0.2, "learning_rate": 4.026577009285944e-05, "loss": 1.7853, "step": 305000 }, { "epoch": 0.2, "eval_loss": 1.749717354774475, "eval_runtime": 92.8891, "eval_samples_per_second": 107.655, "eval_steps_per_second": 6.728, "step": 305000 }, { "epoch": 0.2, "learning_rate": 4.026256804354787e-05, "loss": 1.7743, "step": 305100 }, { "epoch": 0.2, "learning_rate": 4.025936599423632e-05, "loss": 1.7604, "step": 305200 }, { "epoch": 0.2, "learning_rate": 4.025616394492475e-05, "loss": 1.7952, "step": 305300 }, { "epoch": 0.2, "learning_rate": 4.0252961895613196e-05, "loss": 1.7705, "step": 305400 }, { "epoch": 0.2, "learning_rate": 4.024975984630163e-05, "loss": 1.7689, "step": 305500 }, { "epoch": 0.2, "learning_rate": 4.0246557796990076e-05, "loss": 1.7823, "step": 305600 }, { "epoch": 0.2, "learning_rate": 4.0243355747678515e-05, "loss": 1.7845, "step": 305700 }, { "epoch": 0.2, "learning_rate": 4.0240153698366955e-05, "loss": 1.7702, "step": 305800 }, { "epoch": 0.2, "learning_rate": 4.02369516490554e-05, "loss": 1.7876, "step": 305900 }, { "epoch": 0.2, "learning_rate": 4.0233749599743835e-05, "loss": 1.7736, "step": 306000 }, { "epoch": 0.2, "eval_loss": 1.7490158081054688, "eval_runtime": 92.6563, "eval_samples_per_second": 107.926, "eval_steps_per_second": 6.745, "step": 306000 }, { "epoch": 0.2, "learning_rate": 4.023054755043228e-05, "loss": 1.8147, "step": 306100 }, { "epoch": 0.2, "learning_rate": 4.0227345501120714e-05, "loss": 1.7749, "step": 306200 }, { "epoch": 0.2, "learning_rate": 4.022414345180916e-05, "loss": 1.7903, "step": 306300 }, { "epoch": 0.2, "learning_rate": 4.02209414024976e-05, "loss": 1.7719, "step": 306400 }, { "epoch": 0.2, "learning_rate": 4.021773935318604e-05, "loss": 1.7823, "step": 306500 }, { "epoch": 0.2, "learning_rate": 4.0214537303874487e-05, "loss": 1.7566, "step": 306600 }, { "epoch": 0.2, "learning_rate": 4.021133525456292e-05, "loss": 1.8082, "step": 306700 }, { "epoch": 0.2, "learning_rate": 4.0208133205251366e-05, "loss": 1.759, "step": 306800 }, { "epoch": 0.2, "learning_rate": 4.02049311559398e-05, "loss": 1.7576, "step": 306900 }, { "epoch": 0.2, "learning_rate": 4.0201729106628246e-05, "loss": 1.7752, "step": 307000 }, { "epoch": 0.2, "eval_loss": 1.7512949705123901, "eval_runtime": 96.8711, "eval_samples_per_second": 103.23, "eval_steps_per_second": 6.452, "step": 307000 }, { "epoch": 0.2, "learning_rate": 4.0198527057316685e-05, "loss": 1.7848, "step": 307100 }, { "epoch": 0.2, "learning_rate": 4.0195325008005125e-05, "loss": 1.7714, "step": 307200 }, { "epoch": 0.2, "learning_rate": 4.0192122958693565e-05, "loss": 1.7978, "step": 307300 }, { "epoch": 0.2, "learning_rate": 4.0188920909382004e-05, "loss": 1.758, "step": 307400 }, { "epoch": 0.2, "learning_rate": 4.018571886007045e-05, "loss": 1.7893, "step": 307500 }, { "epoch": 0.2, "learning_rate": 4.0182516810758884e-05, "loss": 1.765, "step": 307600 }, { "epoch": 0.2, "learning_rate": 4.017931476144733e-05, "loss": 1.7806, "step": 307700 }, { "epoch": 0.2, "learning_rate": 4.0176112712135763e-05, "loss": 1.762, "step": 307800 }, { "epoch": 0.2, "learning_rate": 4.017291066282421e-05, "loss": 1.7956, "step": 307900 }, { "epoch": 0.2, "learning_rate": 4.016970861351265e-05, "loss": 1.8048, "step": 308000 }, { "epoch": 0.2, "eval_loss": 1.7500168085098267, "eval_runtime": 94.3603, "eval_samples_per_second": 105.977, "eval_steps_per_second": 6.624, "step": 308000 }, { "epoch": 0.2, "learning_rate": 4.016650656420109e-05, "loss": 1.7868, "step": 308100 }, { "epoch": 0.2, "learning_rate": 4.0163304514889536e-05, "loss": 1.7794, "step": 308200 }, { "epoch": 0.2, "learning_rate": 4.016010246557797e-05, "loss": 1.8, "step": 308300 }, { "epoch": 0.2, "learning_rate": 4.0156900416266415e-05, "loss": 1.7765, "step": 308400 }, { "epoch": 0.2, "learning_rate": 4.015369836695485e-05, "loss": 1.7661, "step": 308500 }, { "epoch": 0.2, "learning_rate": 4.0150496317643295e-05, "loss": 1.7923, "step": 308600 }, { "epoch": 0.2, "learning_rate": 4.0147294268331735e-05, "loss": 1.781, "step": 308700 }, { "epoch": 0.2, "learning_rate": 4.0144092219020174e-05, "loss": 1.7667, "step": 308800 }, { "epoch": 0.2, "learning_rate": 4.0140890169708614e-05, "loss": 1.7895, "step": 308900 }, { "epoch": 0.2, "learning_rate": 4.0137688120397054e-05, "loss": 1.7894, "step": 309000 }, { "epoch": 0.2, "eval_loss": 1.751098871231079, "eval_runtime": 92.6613, "eval_samples_per_second": 107.92, "eval_steps_per_second": 6.745, "step": 309000 }, { "epoch": 0.2, "learning_rate": 4.01344860710855e-05, "loss": 1.7731, "step": 309100 }, { "epoch": 0.2, "learning_rate": 4.013128402177393e-05, "loss": 1.7898, "step": 309200 }, { "epoch": 0.2, "learning_rate": 4.012808197246238e-05, "loss": 1.7683, "step": 309300 }, { "epoch": 0.2, "learning_rate": 4.012487992315082e-05, "loss": 1.7761, "step": 309400 }, { "epoch": 0.2, "learning_rate": 4.012167787383926e-05, "loss": 1.7779, "step": 309500 }, { "epoch": 0.2, "learning_rate": 4.01184758245277e-05, "loss": 1.7725, "step": 309600 }, { "epoch": 0.2, "learning_rate": 4.011527377521614e-05, "loss": 1.8004, "step": 309700 }, { "epoch": 0.2, "learning_rate": 4.0112071725904585e-05, "loss": 1.7833, "step": 309800 }, { "epoch": 0.2, "learning_rate": 4.010886967659302e-05, "loss": 1.7682, "step": 309900 }, { "epoch": 0.2, "learning_rate": 4.0105667627281465e-05, "loss": 1.7774, "step": 310000 }, { "epoch": 0.2, "eval_loss": 1.7513691186904907, "eval_runtime": 92.9341, "eval_samples_per_second": 107.603, "eval_steps_per_second": 6.725, "step": 310000 }, { "epoch": 0.2, "learning_rate": 4.01024655779699e-05, "loss": 1.7763, "step": 310100 }, { "epoch": 0.2, "learning_rate": 4.0099263528658344e-05, "loss": 1.7835, "step": 310200 }, { "epoch": 0.2, "learning_rate": 4.0096061479346784e-05, "loss": 1.7718, "step": 310300 }, { "epoch": 0.2, "learning_rate": 4.0092859430035224e-05, "loss": 1.7964, "step": 310400 }, { "epoch": 0.2, "learning_rate": 4.0089657380723663e-05, "loss": 1.7671, "step": 310500 }, { "epoch": 0.2, "learning_rate": 4.00864553314121e-05, "loss": 1.7877, "step": 310600 }, { "epoch": 0.2, "learning_rate": 4.008325328210055e-05, "loss": 1.7844, "step": 310700 }, { "epoch": 0.2, "learning_rate": 4.008005123278898e-05, "loss": 1.7598, "step": 310800 }, { "epoch": 0.2, "learning_rate": 4.007684918347743e-05, "loss": 1.7699, "step": 310900 }, { "epoch": 0.2, "learning_rate": 4.007364713416587e-05, "loss": 1.7897, "step": 311000 }, { "epoch": 0.2, "eval_loss": 1.7487189769744873, "eval_runtime": 92.385, "eval_samples_per_second": 108.243, "eval_steps_per_second": 6.765, "step": 311000 }, { "epoch": 0.2, "learning_rate": 4.007044508485431e-05, "loss": 1.7854, "step": 311100 }, { "epoch": 0.2, "learning_rate": 4.006724303554275e-05, "loss": 1.7623, "step": 311200 }, { "epoch": 0.2, "learning_rate": 4.006404098623119e-05, "loss": 1.7933, "step": 311300 }, { "epoch": 0.2, "learning_rate": 4.0060838936919635e-05, "loss": 1.7823, "step": 311400 }, { "epoch": 0.2, "learning_rate": 4.005763688760807e-05, "loss": 1.7826, "step": 311500 }, { "epoch": 0.2, "learning_rate": 4.0054434838296514e-05, "loss": 1.7614, "step": 311600 }, { "epoch": 0.2, "learning_rate": 4.0051232788984954e-05, "loss": 1.7797, "step": 311700 }, { "epoch": 0.2, "learning_rate": 4.0048030739673394e-05, "loss": 1.7744, "step": 311800 }, { "epoch": 0.2, "learning_rate": 4.004482869036183e-05, "loss": 1.7872, "step": 311900 }, { "epoch": 0.2, "learning_rate": 4.004162664105027e-05, "loss": 1.7845, "step": 312000 }, { "epoch": 0.2, "eval_loss": 1.7495882511138916, "eval_runtime": 94.0395, "eval_samples_per_second": 106.338, "eval_steps_per_second": 6.646, "step": 312000 }, { "epoch": 0.2, "learning_rate": 4.003842459173871e-05, "loss": 1.7913, "step": 312100 }, { "epoch": 0.2, "learning_rate": 4.003522254242715e-05, "loss": 1.7629, "step": 312200 }, { "epoch": 0.2, "learning_rate": 4.00320204931156e-05, "loss": 1.7649, "step": 312300 }, { "epoch": 0.2, "learning_rate": 4.002881844380404e-05, "loss": 1.7651, "step": 312400 }, { "epoch": 0.2, "learning_rate": 4.002561639449248e-05, "loss": 1.7643, "step": 312500 }, { "epoch": 0.2, "learning_rate": 4.002241434518092e-05, "loss": 1.7626, "step": 312600 }, { "epoch": 0.2, "learning_rate": 4.001921229586936e-05, "loss": 1.7628, "step": 312700 }, { "epoch": 0.2, "learning_rate": 4.00160102465578e-05, "loss": 1.7701, "step": 312800 }, { "epoch": 0.2, "learning_rate": 4.001280819724624e-05, "loss": 1.7677, "step": 312900 }, { "epoch": 0.2, "learning_rate": 4.000960614793468e-05, "loss": 1.7632, "step": 313000 }, { "epoch": 0.2, "eval_loss": 1.7518659830093384, "eval_runtime": 95.701, "eval_samples_per_second": 104.492, "eval_steps_per_second": 6.531, "step": 313000 }, { "epoch": 0.2, "learning_rate": 4.000640409862312e-05, "loss": 1.772, "step": 313100 }, { "epoch": 0.2, "learning_rate": 4.0003202049311564e-05, "loss": 1.7963, "step": 313200 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 1.7766, "step": 313300 }, { "epoch": 0.2, "learning_rate": 3.999679795068844e-05, "loss": 1.7917, "step": 313400 }, { "epoch": 0.2, "learning_rate": 3.999359590137688e-05, "loss": 1.7911, "step": 313500 }, { "epoch": 0.2, "learning_rate": 3.999039385206532e-05, "loss": 1.7783, "step": 313600 }, { "epoch": 0.2, "learning_rate": 3.998719180275376e-05, "loss": 1.7837, "step": 313700 }, { "epoch": 0.2, "learning_rate": 3.99839897534422e-05, "loss": 1.766, "step": 313800 }, { "epoch": 0.2, "learning_rate": 3.998078770413065e-05, "loss": 1.7536, "step": 313900 }, { "epoch": 0.2, "learning_rate": 3.997758565481909e-05, "loss": 1.7626, "step": 314000 }, { "epoch": 0.2, "eval_loss": 1.7520837783813477, "eval_runtime": 93.7036, "eval_samples_per_second": 106.719, "eval_steps_per_second": 6.67, "step": 314000 }, { "epoch": 0.2, "learning_rate": 3.997438360550753e-05, "loss": 1.7835, "step": 314100 }, { "epoch": 0.2, "learning_rate": 3.997118155619597e-05, "loss": 1.7508, "step": 314200 }, { "epoch": 0.2, "learning_rate": 3.996797950688441e-05, "loss": 1.7865, "step": 314300 }, { "epoch": 0.2, "learning_rate": 3.996477745757285e-05, "loss": 1.7813, "step": 314400 }, { "epoch": 0.2, "learning_rate": 3.996157540826129e-05, "loss": 1.7704, "step": 314500 }, { "epoch": 0.2, "learning_rate": 3.995837335894973e-05, "loss": 1.7869, "step": 314600 }, { "epoch": 0.2, "learning_rate": 3.995517130963817e-05, "loss": 1.7742, "step": 314700 }, { "epoch": 0.2, "learning_rate": 3.995196926032661e-05, "loss": 1.7635, "step": 314800 }, { "epoch": 0.2, "learning_rate": 3.994876721101505e-05, "loss": 1.7604, "step": 314900 }, { "epoch": 0.2, "learning_rate": 3.994556516170349e-05, "loss": 1.7686, "step": 315000 }, { "epoch": 0.2, "eval_loss": 1.7505390644073486, "eval_runtime": 91.8609, "eval_samples_per_second": 108.86, "eval_steps_per_second": 6.804, "step": 315000 }, { "epoch": 0.2, "learning_rate": 3.994236311239193e-05, "loss": 1.7647, "step": 315100 }, { "epoch": 0.2, "learning_rate": 3.993916106308037e-05, "loss": 1.7773, "step": 315200 }, { "epoch": 0.2, "learning_rate": 3.993595901376881e-05, "loss": 1.787, "step": 315300 }, { "epoch": 0.2, "learning_rate": 3.993275696445725e-05, "loss": 1.7767, "step": 315400 }, { "epoch": 0.2, "learning_rate": 3.99295549151457e-05, "loss": 1.785, "step": 315500 }, { "epoch": 0.2, "learning_rate": 3.992635286583414e-05, "loss": 1.7955, "step": 315600 }, { "epoch": 0.2, "learning_rate": 3.992315081652258e-05, "loss": 1.7627, "step": 315700 }, { "epoch": 0.2, "learning_rate": 3.991994876721102e-05, "loss": 1.7872, "step": 315800 }, { "epoch": 0.2, "learning_rate": 3.991674671789946e-05, "loss": 1.7793, "step": 315900 }, { "epoch": 0.2, "learning_rate": 3.9913544668587897e-05, "loss": 1.7672, "step": 316000 }, { "epoch": 0.2, "eval_loss": 1.7540873289108276, "eval_runtime": 92.3082, "eval_samples_per_second": 108.333, "eval_steps_per_second": 6.771, "step": 316000 }, { "epoch": 0.2, "learning_rate": 3.9910342619276336e-05, "loss": 1.7696, "step": 316100 }, { "epoch": 0.2, "learning_rate": 3.9907140569964776e-05, "loss": 1.7833, "step": 316200 }, { "epoch": 0.2, "learning_rate": 3.990393852065322e-05, "loss": 1.767, "step": 316300 }, { "epoch": 0.2, "learning_rate": 3.990073647134166e-05, "loss": 1.7806, "step": 316400 }, { "epoch": 0.2, "learning_rate": 3.98975344220301e-05, "loss": 1.7817, "step": 316500 }, { "epoch": 0.2, "learning_rate": 3.989433237271854e-05, "loss": 1.7804, "step": 316600 }, { "epoch": 0.2, "learning_rate": 3.989113032340698e-05, "loss": 1.7755, "step": 316700 }, { "epoch": 0.2, "learning_rate": 3.988792827409542e-05, "loss": 1.7681, "step": 316800 }, { "epoch": 0.2, "learning_rate": 3.988472622478386e-05, "loss": 1.785, "step": 316900 }, { "epoch": 0.2, "learning_rate": 3.988152417547231e-05, "loss": 1.7754, "step": 317000 }, { "epoch": 0.2, "eval_loss": 1.7521624565124512, "eval_runtime": 92.8959, "eval_samples_per_second": 107.647, "eval_steps_per_second": 6.728, "step": 317000 }, { "epoch": 0.2, "learning_rate": 3.987832212616075e-05, "loss": 1.7869, "step": 317100 }, { "epoch": 0.2, "learning_rate": 3.987512007684919e-05, "loss": 1.7816, "step": 317200 }, { "epoch": 0.2, "learning_rate": 3.987191802753763e-05, "loss": 1.7715, "step": 317300 }, { "epoch": 0.2, "learning_rate": 3.9868715978226066e-05, "loss": 1.7637, "step": 317400 }, { "epoch": 0.2, "learning_rate": 3.9865513928914506e-05, "loss": 1.7734, "step": 317500 }, { "epoch": 0.2, "learning_rate": 3.9862311879602946e-05, "loss": 1.786, "step": 317600 }, { "epoch": 0.2, "learning_rate": 3.9859109830291386e-05, "loss": 1.7871, "step": 317700 }, { "epoch": 0.2, "learning_rate": 3.9855907780979825e-05, "loss": 1.7679, "step": 317800 }, { "epoch": 0.2, "learning_rate": 3.985270573166827e-05, "loss": 1.7685, "step": 317900 }, { "epoch": 0.2, "learning_rate": 3.984950368235671e-05, "loss": 1.7756, "step": 318000 }, { "epoch": 0.2, "eval_loss": 1.7512810230255127, "eval_runtime": 93.3973, "eval_samples_per_second": 107.069, "eval_steps_per_second": 6.692, "step": 318000 }, { "epoch": 0.2, "learning_rate": 3.984630163304515e-05, "loss": 1.7515, "step": 318100 }, { "epoch": 0.2, "learning_rate": 3.984309958373359e-05, "loss": 1.7987, "step": 318200 }, { "epoch": 0.2, "learning_rate": 3.983989753442203e-05, "loss": 1.7913, "step": 318300 }, { "epoch": 0.2, "learning_rate": 3.983669548511047e-05, "loss": 1.7963, "step": 318400 }, { "epoch": 0.2, "learning_rate": 3.983349343579891e-05, "loss": 1.7715, "step": 318500 }, { "epoch": 0.2, "learning_rate": 3.983029138648736e-05, "loss": 1.7684, "step": 318600 }, { "epoch": 0.2, "learning_rate": 3.9827089337175797e-05, "loss": 1.7654, "step": 318700 }, { "epoch": 0.2, "learning_rate": 3.9823887287864236e-05, "loss": 1.7491, "step": 318800 }, { "epoch": 0.2, "learning_rate": 3.9820685238552676e-05, "loss": 1.7789, "step": 318900 }, { "epoch": 0.2, "learning_rate": 3.9817483189241116e-05, "loss": 1.7911, "step": 319000 }, { "epoch": 0.2, "eval_loss": 1.7479933500289917, "eval_runtime": 94.1621, "eval_samples_per_second": 106.2, "eval_steps_per_second": 6.637, "step": 319000 }, { "epoch": 0.2, "learning_rate": 3.9814281139929556e-05, "loss": 1.7704, "step": 319100 }, { "epoch": 0.2, "learning_rate": 3.9811079090617995e-05, "loss": 1.7801, "step": 319200 }, { "epoch": 0.2, "learning_rate": 3.980787704130644e-05, "loss": 1.7677, "step": 319300 }, { "epoch": 0.2, "learning_rate": 3.9804674991994875e-05, "loss": 1.7686, "step": 319400 }, { "epoch": 0.2, "learning_rate": 3.980147294268332e-05, "loss": 1.7527, "step": 319500 }, { "epoch": 0.2, "learning_rate": 3.979827089337176e-05, "loss": 1.7891, "step": 319600 }, { "epoch": 0.2, "learning_rate": 3.97950688440602e-05, "loss": 1.7846, "step": 319700 }, { "epoch": 0.2, "learning_rate": 3.979186679474864e-05, "loss": 1.7596, "step": 319800 }, { "epoch": 0.2, "learning_rate": 3.978866474543708e-05, "loss": 1.7947, "step": 319900 }, { "epoch": 0.2, "learning_rate": 3.978546269612553e-05, "loss": 1.7842, "step": 320000 }, { "epoch": 0.2, "eval_loss": 1.7528327703475952, "eval_runtime": 93.2457, "eval_samples_per_second": 107.244, "eval_steps_per_second": 6.703, "step": 320000 }, { "epoch": 0.2, "learning_rate": 3.978226064681396e-05, "loss": 1.7659, "step": 320100 }, { "epoch": 0.2, "learning_rate": 3.9779058597502406e-05, "loss": 1.7753, "step": 320200 }, { "epoch": 0.2, "learning_rate": 3.9775856548190846e-05, "loss": 1.7969, "step": 320300 }, { "epoch": 0.21, "learning_rate": 3.9772654498879286e-05, "loss": 1.786, "step": 320400 }, { "epoch": 0.21, "learning_rate": 3.9769452449567725e-05, "loss": 1.7912, "step": 320500 }, { "epoch": 0.21, "learning_rate": 3.9766250400256165e-05, "loss": 1.776, "step": 320600 }, { "epoch": 0.21, "learning_rate": 3.9763048350944605e-05, "loss": 1.7609, "step": 320700 }, { "epoch": 0.21, "learning_rate": 3.9759846301633045e-05, "loss": 1.7783, "step": 320800 }, { "epoch": 0.21, "learning_rate": 3.975664425232149e-05, "loss": 1.7755, "step": 320900 }, { "epoch": 0.21, "learning_rate": 3.9753442203009924e-05, "loss": 1.7649, "step": 321000 }, { "epoch": 0.21, "eval_loss": 1.7503031492233276, "eval_runtime": 92.7731, "eval_samples_per_second": 107.79, "eval_steps_per_second": 6.737, "step": 321000 }, { "epoch": 0.21, "learning_rate": 3.975024015369837e-05, "loss": 1.7725, "step": 321100 }, { "epoch": 0.21, "learning_rate": 3.974703810438681e-05, "loss": 1.7924, "step": 321200 }, { "epoch": 0.21, "learning_rate": 3.974383605507525e-05, "loss": 1.7764, "step": 321300 }, { "epoch": 0.21, "learning_rate": 3.974063400576369e-05, "loss": 1.7649, "step": 321400 }, { "epoch": 0.21, "learning_rate": 3.973743195645213e-05, "loss": 1.7678, "step": 321500 }, { "epoch": 0.21, "learning_rate": 3.9734229907140576e-05, "loss": 1.7622, "step": 321600 }, { "epoch": 0.21, "learning_rate": 3.973102785782901e-05, "loss": 1.7641, "step": 321700 }, { "epoch": 0.21, "learning_rate": 3.9727825808517456e-05, "loss": 1.779, "step": 321800 }, { "epoch": 0.21, "learning_rate": 3.9724623759205895e-05, "loss": 1.7901, "step": 321900 }, { "epoch": 0.21, "learning_rate": 3.9721421709894335e-05, "loss": 1.759, "step": 322000 }, { "epoch": 0.21, "eval_loss": 1.749916434288025, "eval_runtime": 92.3376, "eval_samples_per_second": 108.298, "eval_steps_per_second": 6.769, "step": 322000 }, { "epoch": 0.21, "learning_rate": 3.9718219660582775e-05, "loss": 1.7753, "step": 322100 }, { "epoch": 0.21, "learning_rate": 3.9715017611271215e-05, "loss": 1.7824, "step": 322200 }, { "epoch": 0.21, "learning_rate": 3.971181556195966e-05, "loss": 1.7803, "step": 322300 }, { "epoch": 0.21, "learning_rate": 3.9708613512648094e-05, "loss": 1.7742, "step": 322400 }, { "epoch": 0.21, "learning_rate": 3.970541146333654e-05, "loss": 1.7655, "step": 322500 }, { "epoch": 0.21, "learning_rate": 3.9702209414024974e-05, "loss": 1.7946, "step": 322600 }, { "epoch": 0.21, "learning_rate": 3.969900736471342e-05, "loss": 1.7617, "step": 322700 }, { "epoch": 0.21, "learning_rate": 3.969580531540186e-05, "loss": 1.7792, "step": 322800 }, { "epoch": 0.21, "learning_rate": 3.96926032660903e-05, "loss": 1.7837, "step": 322900 }, { "epoch": 0.21, "learning_rate": 3.968940121677874e-05, "loss": 1.7549, "step": 323000 }, { "epoch": 0.21, "eval_loss": 1.7493877410888672, "eval_runtime": 93.9672, "eval_samples_per_second": 106.42, "eval_steps_per_second": 6.651, "step": 323000 }, { "epoch": 0.21, "learning_rate": 3.968619916746718e-05, "loss": 1.7566, "step": 323100 }, { "epoch": 0.21, "learning_rate": 3.9682997118155625e-05, "loss": 1.7861, "step": 323200 }, { "epoch": 0.21, "learning_rate": 3.967979506884406e-05, "loss": 1.7753, "step": 323300 }, { "epoch": 0.21, "learning_rate": 3.9676593019532505e-05, "loss": 1.7773, "step": 323400 }, { "epoch": 0.21, "learning_rate": 3.9673390970220945e-05, "loss": 1.7513, "step": 323500 }, { "epoch": 0.21, "learning_rate": 3.9670188920909384e-05, "loss": 1.7805, "step": 323600 }, { "epoch": 0.21, "learning_rate": 3.9666986871597824e-05, "loss": 1.7701, "step": 323700 }, { "epoch": 0.21, "learning_rate": 3.9663784822286264e-05, "loss": 1.7878, "step": 323800 }, { "epoch": 0.21, "learning_rate": 3.966058277297471e-05, "loss": 1.7839, "step": 323900 }, { "epoch": 0.21, "learning_rate": 3.9657380723663143e-05, "loss": 1.7638, "step": 324000 }, { "epoch": 0.21, "eval_loss": 1.7498164176940918, "eval_runtime": 92.6184, "eval_samples_per_second": 107.97, "eval_steps_per_second": 6.748, "step": 324000 }, { "epoch": 0.21, "learning_rate": 3.965417867435159e-05, "loss": 1.7585, "step": 324100 }, { "epoch": 0.21, "learning_rate": 3.965097662504002e-05, "loss": 1.7641, "step": 324200 }, { "epoch": 0.21, "learning_rate": 3.964777457572847e-05, "loss": 1.7727, "step": 324300 }, { "epoch": 0.21, "learning_rate": 3.964457252641691e-05, "loss": 1.7789, "step": 324400 }, { "epoch": 0.21, "learning_rate": 3.964137047710535e-05, "loss": 1.778, "step": 324500 }, { "epoch": 0.21, "learning_rate": 3.9638168427793795e-05, "loss": 1.7834, "step": 324600 }, { "epoch": 0.21, "learning_rate": 3.963496637848223e-05, "loss": 1.7706, "step": 324700 }, { "epoch": 0.21, "learning_rate": 3.9631764329170675e-05, "loss": 1.7731, "step": 324800 }, { "epoch": 0.21, "learning_rate": 3.962856227985911e-05, "loss": 1.7771, "step": 324900 }, { "epoch": 0.21, "learning_rate": 3.9625360230547554e-05, "loss": 1.7682, "step": 325000 }, { "epoch": 0.21, "eval_loss": 1.7491081953048706, "eval_runtime": 94.9524, "eval_samples_per_second": 105.316, "eval_steps_per_second": 6.582, "step": 325000 }, { "epoch": 0.21, "learning_rate": 3.9622158181235994e-05, "loss": 1.7514, "step": 325100 }, { "epoch": 0.21, "learning_rate": 3.9618956131924434e-05, "loss": 1.7598, "step": 325200 }, { "epoch": 0.21, "learning_rate": 3.9615754082612874e-05, "loss": 1.7677, "step": 325300 }, { "epoch": 0.21, "learning_rate": 3.961255203330131e-05, "loss": 1.746, "step": 325400 }, { "epoch": 0.21, "learning_rate": 3.960934998398976e-05, "loss": 1.764, "step": 325500 }, { "epoch": 0.21, "learning_rate": 3.960614793467819e-05, "loss": 1.7676, "step": 325600 }, { "epoch": 0.21, "learning_rate": 3.960294588536664e-05, "loss": 1.7564, "step": 325700 }, { "epoch": 0.21, "learning_rate": 3.959974383605507e-05, "loss": 1.77, "step": 325800 }, { "epoch": 0.21, "learning_rate": 3.959654178674352e-05, "loss": 1.7881, "step": 325900 }, { "epoch": 0.21, "learning_rate": 3.959333973743196e-05, "loss": 1.7829, "step": 326000 }, { "epoch": 0.21, "eval_loss": 1.7522125244140625, "eval_runtime": 94.916, "eval_samples_per_second": 105.356, "eval_steps_per_second": 6.585, "step": 326000 }, { "epoch": 0.21, "learning_rate": 3.95901376881204e-05, "loss": 1.7615, "step": 326100 }, { "epoch": 0.21, "learning_rate": 3.9586935638808845e-05, "loss": 1.7555, "step": 326200 }, { "epoch": 0.21, "learning_rate": 3.958373358949728e-05, "loss": 1.7764, "step": 326300 }, { "epoch": 0.21, "learning_rate": 3.9580531540185724e-05, "loss": 1.7837, "step": 326400 }, { "epoch": 0.21, "learning_rate": 3.957732949087416e-05, "loss": 1.7888, "step": 326500 }, { "epoch": 0.21, "learning_rate": 3.9574127441562604e-05, "loss": 1.7741, "step": 326600 }, { "epoch": 0.21, "learning_rate": 3.9570925392251043e-05, "loss": 1.7825, "step": 326700 }, { "epoch": 0.21, "learning_rate": 3.956772334293948e-05, "loss": 1.7787, "step": 326800 }, { "epoch": 0.21, "learning_rate": 3.956452129362793e-05, "loss": 1.7731, "step": 326900 }, { "epoch": 0.21, "learning_rate": 3.956131924431636e-05, "loss": 1.7635, "step": 327000 }, { "epoch": 0.21, "eval_loss": 1.7480334043502808, "eval_runtime": 94.9057, "eval_samples_per_second": 105.368, "eval_steps_per_second": 6.585, "step": 327000 }, { "epoch": 0.21, "learning_rate": 3.955811719500481e-05, "loss": 1.7813, "step": 327100 }, { "epoch": 0.21, "learning_rate": 3.955491514569324e-05, "loss": 1.7741, "step": 327200 }, { "epoch": 0.21, "learning_rate": 3.955171309638169e-05, "loss": 1.7746, "step": 327300 }, { "epoch": 0.21, "learning_rate": 3.954851104707012e-05, "loss": 1.7654, "step": 327400 }, { "epoch": 0.21, "learning_rate": 3.954530899775857e-05, "loss": 1.7671, "step": 327500 }, { "epoch": 0.21, "learning_rate": 3.954210694844701e-05, "loss": 1.7691, "step": 327600 }, { "epoch": 0.21, "learning_rate": 3.953890489913545e-05, "loss": 1.7727, "step": 327700 }, { "epoch": 0.21, "learning_rate": 3.9535702849823894e-05, "loss": 1.7543, "step": 327800 }, { "epoch": 0.21, "learning_rate": 3.953250080051233e-05, "loss": 1.758, "step": 327900 }, { "epoch": 0.21, "learning_rate": 3.9529298751200774e-05, "loss": 1.7591, "step": 328000 }, { "epoch": 0.21, "eval_loss": 1.7475228309631348, "eval_runtime": 93.9161, "eval_samples_per_second": 106.478, "eval_steps_per_second": 6.655, "step": 328000 }, { "epoch": 0.21, "learning_rate": 3.9526096701889207e-05, "loss": 1.7628, "step": 328100 }, { "epoch": 0.21, "learning_rate": 3.952289465257765e-05, "loss": 1.7659, "step": 328200 }, { "epoch": 0.21, "learning_rate": 3.951969260326609e-05, "loss": 1.794, "step": 328300 }, { "epoch": 0.21, "learning_rate": 3.951649055395453e-05, "loss": 1.7681, "step": 328400 }, { "epoch": 0.21, "learning_rate": 3.951328850464298e-05, "loss": 1.7731, "step": 328500 }, { "epoch": 0.21, "learning_rate": 3.951008645533141e-05, "loss": 1.7786, "step": 328600 }, { "epoch": 0.21, "learning_rate": 3.950688440601986e-05, "loss": 1.776, "step": 328700 }, { "epoch": 0.21, "learning_rate": 3.950368235670829e-05, "loss": 1.7663, "step": 328800 }, { "epoch": 0.21, "learning_rate": 3.950048030739674e-05, "loss": 1.7727, "step": 328900 }, { "epoch": 0.21, "learning_rate": 3.949727825808517e-05, "loss": 1.7661, "step": 329000 }, { "epoch": 0.21, "eval_loss": 1.749069333076477, "eval_runtime": 92.2665, "eval_samples_per_second": 108.382, "eval_steps_per_second": 6.774, "step": 329000 }, { "epoch": 0.21, "learning_rate": 3.949407620877362e-05, "loss": 1.7585, "step": 329100 }, { "epoch": 0.21, "learning_rate": 3.949087415946206e-05, "loss": 1.7768, "step": 329200 }, { "epoch": 0.21, "learning_rate": 3.94876721101505e-05, "loss": 1.772, "step": 329300 }, { "epoch": 0.21, "learning_rate": 3.9484470060838944e-05, "loss": 1.7891, "step": 329400 }, { "epoch": 0.21, "learning_rate": 3.9481268011527376e-05, "loss": 1.7857, "step": 329500 }, { "epoch": 0.21, "learning_rate": 3.947806596221582e-05, "loss": 1.779, "step": 329600 }, { "epoch": 0.21, "learning_rate": 3.9474863912904256e-05, "loss": 1.7555, "step": 329700 }, { "epoch": 0.21, "learning_rate": 3.94716618635927e-05, "loss": 1.7748, "step": 329800 }, { "epoch": 0.21, "learning_rate": 3.946845981428114e-05, "loss": 1.782, "step": 329900 }, { "epoch": 0.21, "learning_rate": 3.946525776496958e-05, "loss": 1.7645, "step": 330000 }, { "epoch": 0.21, "eval_loss": 1.7463654279708862, "eval_runtime": 91.8329, "eval_samples_per_second": 108.893, "eval_steps_per_second": 6.806, "step": 330000 }, { "epoch": 0.21, "learning_rate": 3.946205571565802e-05, "loss": 1.7571, "step": 330100 }, { "epoch": 0.21, "learning_rate": 3.945885366634646e-05, "loss": 1.7579, "step": 330200 }, { "epoch": 0.21, "learning_rate": 3.945565161703491e-05, "loss": 1.766, "step": 330300 }, { "epoch": 0.21, "learning_rate": 3.945244956772334e-05, "loss": 1.7562, "step": 330400 }, { "epoch": 0.21, "learning_rate": 3.944924751841179e-05, "loss": 1.7667, "step": 330500 }, { "epoch": 0.21, "learning_rate": 3.944604546910022e-05, "loss": 1.7513, "step": 330600 }, { "epoch": 0.21, "learning_rate": 3.944284341978867e-05, "loss": 1.7668, "step": 330700 }, { "epoch": 0.21, "learning_rate": 3.943964137047711e-05, "loss": 1.7644, "step": 330800 }, { "epoch": 0.21, "learning_rate": 3.9436439321165546e-05, "loss": 1.7757, "step": 330900 }, { "epoch": 0.21, "learning_rate": 3.943323727185399e-05, "loss": 1.7826, "step": 331000 }, { "epoch": 0.21, "eval_loss": 1.7462236881256104, "eval_runtime": 93.2834, "eval_samples_per_second": 107.2, "eval_steps_per_second": 6.7, "step": 331000 }, { "epoch": 0.21, "learning_rate": 3.9430035222542426e-05, "loss": 1.772, "step": 331100 }, { "epoch": 0.21, "learning_rate": 3.942683317323087e-05, "loss": 1.7568, "step": 331200 }, { "epoch": 0.21, "learning_rate": 3.9423631123919305e-05, "loss": 1.7665, "step": 331300 }, { "epoch": 0.21, "learning_rate": 3.942042907460775e-05, "loss": 1.7623, "step": 331400 }, { "epoch": 0.21, "learning_rate": 3.941722702529619e-05, "loss": 1.7645, "step": 331500 }, { "epoch": 0.21, "learning_rate": 3.941402497598463e-05, "loss": 1.759, "step": 331600 }, { "epoch": 0.21, "learning_rate": 3.941082292667307e-05, "loss": 1.7673, "step": 331700 }, { "epoch": 0.21, "learning_rate": 3.940762087736151e-05, "loss": 1.7699, "step": 331800 }, { "epoch": 0.21, "learning_rate": 3.940441882804996e-05, "loss": 1.767, "step": 331900 }, { "epoch": 0.21, "learning_rate": 3.940121677873839e-05, "loss": 1.7807, "step": 332000 }, { "epoch": 0.21, "eval_loss": 1.7480707168579102, "eval_runtime": 94.9743, "eval_samples_per_second": 105.292, "eval_steps_per_second": 6.581, "step": 332000 }, { "epoch": 0.21, "learning_rate": 3.939801472942684e-05, "loss": 1.7292, "step": 332100 }, { "epoch": 0.21, "learning_rate": 3.9394812680115277e-05, "loss": 1.7434, "step": 332200 }, { "epoch": 0.21, "learning_rate": 3.9391610630803716e-05, "loss": 1.7475, "step": 332300 }, { "epoch": 0.21, "learning_rate": 3.9388408581492156e-05, "loss": 1.7641, "step": 332400 }, { "epoch": 0.21, "learning_rate": 3.9385206532180596e-05, "loss": 1.7454, "step": 332500 }, { "epoch": 0.21, "learning_rate": 3.938200448286904e-05, "loss": 1.7492, "step": 332600 }, { "epoch": 0.21, "learning_rate": 3.9378802433557475e-05, "loss": 1.7701, "step": 332700 }, { "epoch": 0.21, "learning_rate": 3.937560038424592e-05, "loss": 1.7908, "step": 332800 }, { "epoch": 0.21, "learning_rate": 3.9372398334934355e-05, "loss": 1.7766, "step": 332900 }, { "epoch": 0.21, "learning_rate": 3.93691962856228e-05, "loss": 1.7612, "step": 333000 }, { "epoch": 0.21, "eval_loss": 1.7460668087005615, "eval_runtime": 95.7447, "eval_samples_per_second": 104.444, "eval_steps_per_second": 6.528, "step": 333000 }, { "epoch": 0.21, "learning_rate": 3.936599423631124e-05, "loss": 1.7717, "step": 333100 }, { "epoch": 0.21, "learning_rate": 3.936279218699968e-05, "loss": 1.7284, "step": 333200 }, { "epoch": 0.21, "learning_rate": 3.935959013768812e-05, "loss": 1.7757, "step": 333300 }, { "epoch": 0.21, "learning_rate": 3.935638808837656e-05, "loss": 1.7704, "step": 333400 }, { "epoch": 0.21, "learning_rate": 3.935318603906501e-05, "loss": 1.7899, "step": 333500 }, { "epoch": 0.21, "learning_rate": 3.934998398975344e-05, "loss": 1.758, "step": 333600 }, { "epoch": 0.21, "learning_rate": 3.9346781940441886e-05, "loss": 1.7665, "step": 333700 }, { "epoch": 0.21, "learning_rate": 3.9343579891130326e-05, "loss": 1.7591, "step": 333800 }, { "epoch": 0.21, "learning_rate": 3.9340377841818766e-05, "loss": 1.7506, "step": 333900 }, { "epoch": 0.21, "learning_rate": 3.9337175792507205e-05, "loss": 1.7646, "step": 334000 }, { "epoch": 0.21, "eval_loss": 1.7470133304595947, "eval_runtime": 94.4624, "eval_samples_per_second": 105.862, "eval_steps_per_second": 6.616, "step": 334000 }, { "epoch": 0.21, "learning_rate": 3.9333973743195645e-05, "loss": 1.7774, "step": 334100 }, { "epoch": 0.21, "learning_rate": 3.933077169388409e-05, "loss": 1.7586, "step": 334200 }, { "epoch": 0.21, "learning_rate": 3.9327569644572525e-05, "loss": 1.7456, "step": 334300 }, { "epoch": 0.21, "learning_rate": 3.932436759526097e-05, "loss": 1.7643, "step": 334400 }, { "epoch": 0.21, "learning_rate": 3.932116554594941e-05, "loss": 1.7803, "step": 334500 }, { "epoch": 0.21, "learning_rate": 3.931796349663785e-05, "loss": 1.7578, "step": 334600 }, { "epoch": 0.21, "learning_rate": 3.931476144732629e-05, "loss": 1.7656, "step": 334700 }, { "epoch": 0.21, "learning_rate": 3.931155939801473e-05, "loss": 1.7526, "step": 334800 }, { "epoch": 0.21, "learning_rate": 3.930835734870317e-05, "loss": 1.7879, "step": 334900 }, { "epoch": 0.21, "learning_rate": 3.930515529939161e-05, "loss": 1.7811, "step": 335000 }, { "epoch": 0.21, "eval_loss": 1.7459101676940918, "eval_runtime": 95.292, "eval_samples_per_second": 104.941, "eval_steps_per_second": 6.559, "step": 335000 }, { "epoch": 0.21, "learning_rate": 3.9301953250080056e-05, "loss": 1.7521, "step": 335100 }, { "epoch": 0.21, "learning_rate": 3.929875120076849e-05, "loss": 1.7588, "step": 335200 }, { "epoch": 0.21, "learning_rate": 3.9295549151456936e-05, "loss": 1.7676, "step": 335300 }, { "epoch": 0.21, "learning_rate": 3.9292347102145375e-05, "loss": 1.761, "step": 335400 }, { "epoch": 0.21, "learning_rate": 3.9289145052833815e-05, "loss": 1.74, "step": 335500 }, { "epoch": 0.21, "learning_rate": 3.9285943003522255e-05, "loss": 1.76, "step": 335600 }, { "epoch": 0.21, "learning_rate": 3.9282740954210694e-05, "loss": 1.7585, "step": 335700 }, { "epoch": 0.21, "learning_rate": 3.927953890489914e-05, "loss": 1.7654, "step": 335800 }, { "epoch": 0.21, "learning_rate": 3.9276336855587574e-05, "loss": 1.7663, "step": 335900 }, { "epoch": 0.22, "learning_rate": 3.927313480627602e-05, "loss": 1.7659, "step": 336000 }, { "epoch": 0.22, "eval_loss": 1.7486605644226074, "eval_runtime": 93.0139, "eval_samples_per_second": 107.511, "eval_steps_per_second": 6.719, "step": 336000 }, { "epoch": 0.22, "learning_rate": 3.926993275696446e-05, "loss": 1.7621, "step": 336100 }, { "epoch": 0.22, "learning_rate": 3.92667307076529e-05, "loss": 1.7682, "step": 336200 }, { "epoch": 0.22, "learning_rate": 3.926352865834134e-05, "loss": 1.7527, "step": 336300 }, { "epoch": 0.22, "learning_rate": 3.926032660902978e-05, "loss": 1.7782, "step": 336400 }, { "epoch": 0.22, "learning_rate": 3.925712455971822e-05, "loss": 1.7778, "step": 336500 }, { "epoch": 0.22, "learning_rate": 3.925392251040666e-05, "loss": 1.7453, "step": 336600 }, { "epoch": 0.22, "learning_rate": 3.9250720461095105e-05, "loss": 1.747, "step": 336700 }, { "epoch": 0.22, "learning_rate": 3.9247518411783545e-05, "loss": 1.773, "step": 336800 }, { "epoch": 0.22, "learning_rate": 3.9244316362471985e-05, "loss": 1.7691, "step": 336900 }, { "epoch": 0.22, "learning_rate": 3.9241114313160425e-05, "loss": 1.7554, "step": 337000 }, { "epoch": 0.22, "eval_loss": 1.746351957321167, "eval_runtime": 92.3367, "eval_samples_per_second": 108.299, "eval_steps_per_second": 6.769, "step": 337000 }, { "epoch": 0.22, "learning_rate": 3.9237912263848864e-05, "loss": 1.7546, "step": 337100 }, { "epoch": 0.22, "learning_rate": 3.9234710214537304e-05, "loss": 1.7642, "step": 337200 }, { "epoch": 0.22, "learning_rate": 3.9231508165225744e-05, "loss": 1.7632, "step": 337300 }, { "epoch": 0.22, "learning_rate": 3.922830611591419e-05, "loss": 1.772, "step": 337400 }, { "epoch": 0.22, "learning_rate": 3.922510406660262e-05, "loss": 1.748, "step": 337500 }, { "epoch": 0.22, "learning_rate": 3.922190201729107e-05, "loss": 1.7582, "step": 337600 }, { "epoch": 0.22, "learning_rate": 3.921869996797951e-05, "loss": 1.7467, "step": 337700 }, { "epoch": 0.22, "learning_rate": 3.921549791866795e-05, "loss": 1.7577, "step": 337800 }, { "epoch": 0.22, "learning_rate": 3.921229586935639e-05, "loss": 1.7831, "step": 337900 }, { "epoch": 0.22, "learning_rate": 3.920909382004483e-05, "loss": 1.7671, "step": 338000 }, { "epoch": 0.22, "eval_loss": 1.7471439838409424, "eval_runtime": 94.2759, "eval_samples_per_second": 106.072, "eval_steps_per_second": 6.629, "step": 338000 }, { "epoch": 0.22, "learning_rate": 3.920589177073327e-05, "loss": 1.779, "step": 338100 }, { "epoch": 0.22, "learning_rate": 3.920268972142171e-05, "loss": 1.7609, "step": 338200 }, { "epoch": 0.22, "learning_rate": 3.9199487672110155e-05, "loss": 1.7465, "step": 338300 }, { "epoch": 0.22, "learning_rate": 3.9196285622798595e-05, "loss": 1.7725, "step": 338400 }, { "epoch": 0.22, "learning_rate": 3.9193083573487034e-05, "loss": 1.7768, "step": 338500 }, { "epoch": 0.22, "learning_rate": 3.9189881524175474e-05, "loss": 1.7636, "step": 338600 }, { "epoch": 0.22, "learning_rate": 3.9186679474863914e-05, "loss": 1.7462, "step": 338700 }, { "epoch": 0.22, "learning_rate": 3.9183477425552353e-05, "loss": 1.7889, "step": 338800 }, { "epoch": 0.22, "learning_rate": 3.918027537624079e-05, "loss": 1.7515, "step": 338900 }, { "epoch": 0.22, "learning_rate": 3.917707332692924e-05, "loss": 1.7581, "step": 339000 }, { "epoch": 0.22, "eval_loss": 1.7508786916732788, "eval_runtime": 90.7678, "eval_samples_per_second": 110.171, "eval_steps_per_second": 6.886, "step": 339000 }, { "epoch": 0.22, "learning_rate": 3.917387127761768e-05, "loss": 1.763, "step": 339100 }, { "epoch": 0.22, "learning_rate": 3.917066922830612e-05, "loss": 1.7806, "step": 339200 }, { "epoch": 0.22, "learning_rate": 3.916746717899456e-05, "loss": 1.7669, "step": 339300 }, { "epoch": 0.22, "learning_rate": 3.9164265129683e-05, "loss": 1.764, "step": 339400 }, { "epoch": 0.22, "learning_rate": 3.916106308037144e-05, "loss": 1.7796, "step": 339500 }, { "epoch": 0.22, "learning_rate": 3.915786103105988e-05, "loss": 1.7586, "step": 339600 }, { "epoch": 0.22, "learning_rate": 3.915465898174832e-05, "loss": 1.7739, "step": 339700 }, { "epoch": 0.22, "learning_rate": 3.9151456932436764e-05, "loss": 1.7581, "step": 339800 }, { "epoch": 0.22, "learning_rate": 3.9148254883125204e-05, "loss": 1.7503, "step": 339900 }, { "epoch": 0.22, "learning_rate": 3.9145052833813644e-05, "loss": 1.7801, "step": 340000 }, { "epoch": 0.22, "eval_loss": 1.7464314699172974, "eval_runtime": 94.2324, "eval_samples_per_second": 106.121, "eval_steps_per_second": 6.633, "step": 340000 }, { "epoch": 0.22, "learning_rate": 3.9141850784502084e-05, "loss": 1.7808, "step": 340100 }, { "epoch": 0.22, "learning_rate": 3.913864873519052e-05, "loss": 1.7636, "step": 340200 }, { "epoch": 0.22, "learning_rate": 3.913544668587896e-05, "loss": 1.7726, "step": 340300 }, { "epoch": 0.22, "learning_rate": 3.91322446365674e-05, "loss": 1.7667, "step": 340400 }, { "epoch": 0.22, "learning_rate": 3.912904258725584e-05, "loss": 1.7531, "step": 340500 }, { "epoch": 0.22, "learning_rate": 3.912584053794429e-05, "loss": 1.7512, "step": 340600 }, { "epoch": 0.22, "learning_rate": 3.912263848863273e-05, "loss": 1.7847, "step": 340700 }, { "epoch": 0.22, "learning_rate": 3.911943643932117e-05, "loss": 1.7418, "step": 340800 }, { "epoch": 0.22, "learning_rate": 3.911623439000961e-05, "loss": 1.7603, "step": 340900 }, { "epoch": 0.22, "learning_rate": 3.911303234069805e-05, "loss": 1.7421, "step": 341000 }, { "epoch": 0.22, "eval_loss": 1.7501707077026367, "eval_runtime": 91.927, "eval_samples_per_second": 108.782, "eval_steps_per_second": 6.799, "step": 341000 }, { "epoch": 0.22, "learning_rate": 3.910983029138649e-05, "loss": 1.781, "step": 341100 }, { "epoch": 0.22, "learning_rate": 3.910662824207493e-05, "loss": 1.7496, "step": 341200 }, { "epoch": 0.22, "learning_rate": 3.910342619276337e-05, "loss": 1.7554, "step": 341300 }, { "epoch": 0.22, "learning_rate": 3.9100224143451814e-05, "loss": 1.7751, "step": 341400 }, { "epoch": 0.22, "learning_rate": 3.9097022094140254e-05, "loss": 1.7659, "step": 341500 }, { "epoch": 0.22, "learning_rate": 3.909382004482869e-05, "loss": 1.7673, "step": 341600 }, { "epoch": 0.22, "learning_rate": 3.909061799551713e-05, "loss": 1.7371, "step": 341700 }, { "epoch": 0.22, "learning_rate": 3.908741594620557e-05, "loss": 1.7458, "step": 341800 }, { "epoch": 0.22, "learning_rate": 3.908421389689401e-05, "loss": 1.7561, "step": 341900 }, { "epoch": 0.22, "learning_rate": 3.908101184758245e-05, "loss": 1.7619, "step": 342000 }, { "epoch": 0.22, "eval_loss": 1.7497646808624268, "eval_runtime": 90.9275, "eval_samples_per_second": 109.978, "eval_steps_per_second": 6.874, "step": 342000 }, { "epoch": 0.22, "learning_rate": 3.90778097982709e-05, "loss": 1.7732, "step": 342100 }, { "epoch": 0.22, "learning_rate": 3.907460774895934e-05, "loss": 1.7768, "step": 342200 }, { "epoch": 0.22, "learning_rate": 3.907140569964778e-05, "loss": 1.7569, "step": 342300 }, { "epoch": 0.22, "learning_rate": 3.906820365033622e-05, "loss": 1.7492, "step": 342400 }, { "epoch": 0.22, "learning_rate": 3.906500160102466e-05, "loss": 1.7771, "step": 342500 }, { "epoch": 0.22, "learning_rate": 3.90617995517131e-05, "loss": 1.7419, "step": 342600 }, { "epoch": 0.22, "learning_rate": 3.905859750240154e-05, "loss": 1.7498, "step": 342700 }, { "epoch": 0.22, "learning_rate": 3.905539545308998e-05, "loss": 1.7538, "step": 342800 }, { "epoch": 0.22, "learning_rate": 3.905219340377842e-05, "loss": 1.7653, "step": 342900 }, { "epoch": 0.22, "learning_rate": 3.904899135446686e-05, "loss": 1.7746, "step": 343000 }, { "epoch": 0.22, "eval_loss": 1.7465687990188599, "eval_runtime": 89.7628, "eval_samples_per_second": 111.405, "eval_steps_per_second": 6.963, "step": 343000 }, { "epoch": 0.22, "learning_rate": 3.90457893051553e-05, "loss": 1.7536, "step": 343100 }, { "epoch": 0.22, "learning_rate": 3.904258725584374e-05, "loss": 1.7908, "step": 343200 }, { "epoch": 0.22, "learning_rate": 3.903938520653218e-05, "loss": 1.7527, "step": 343300 }, { "epoch": 0.22, "learning_rate": 3.903618315722062e-05, "loss": 1.7554, "step": 343400 }, { "epoch": 0.22, "learning_rate": 3.903298110790906e-05, "loss": 1.7806, "step": 343500 }, { "epoch": 0.22, "learning_rate": 3.90297790585975e-05, "loss": 1.7774, "step": 343600 }, { "epoch": 0.22, "learning_rate": 3.902657700928595e-05, "loss": 1.7556, "step": 343700 }, { "epoch": 0.22, "learning_rate": 3.902337495997439e-05, "loss": 1.7741, "step": 343800 }, { "epoch": 0.22, "learning_rate": 3.902017291066283e-05, "loss": 1.7484, "step": 343900 }, { "epoch": 0.22, "learning_rate": 3.901697086135127e-05, "loss": 1.7651, "step": 344000 }, { "epoch": 0.22, "eval_loss": 1.748967170715332, "eval_runtime": 91.3505, "eval_samples_per_second": 109.468, "eval_steps_per_second": 6.842, "step": 344000 }, { "epoch": 0.22, "learning_rate": 3.901376881203971e-05, "loss": 1.7732, "step": 344100 }, { "epoch": 0.22, "learning_rate": 3.901056676272815e-05, "loss": 1.7476, "step": 344200 }, { "epoch": 0.22, "learning_rate": 3.9007364713416587e-05, "loss": 1.7672, "step": 344300 }, { "epoch": 0.22, "learning_rate": 3.900416266410503e-05, "loss": 1.7794, "step": 344400 }, { "epoch": 0.22, "learning_rate": 3.9000960614793466e-05, "loss": 1.7833, "step": 344500 }, { "epoch": 0.22, "learning_rate": 3.899775856548191e-05, "loss": 1.7662, "step": 344600 }, { "epoch": 0.22, "learning_rate": 3.899455651617035e-05, "loss": 1.7468, "step": 344700 }, { "epoch": 0.22, "learning_rate": 3.899135446685879e-05, "loss": 1.7591, "step": 344800 }, { "epoch": 0.22, "learning_rate": 3.898815241754723e-05, "loss": 1.7846, "step": 344900 }, { "epoch": 0.22, "learning_rate": 3.898495036823567e-05, "loss": 1.744, "step": 345000 }, { "epoch": 0.22, "eval_loss": 1.74796724319458, "eval_runtime": 89.6288, "eval_samples_per_second": 111.571, "eval_steps_per_second": 6.973, "step": 345000 }, { "epoch": 0.22, "learning_rate": 3.898174831892411e-05, "loss": 1.7755, "step": 345100 }, { "epoch": 0.22, "learning_rate": 3.897854626961255e-05, "loss": 1.7446, "step": 345200 }, { "epoch": 0.22, "learning_rate": 3.8975344220301e-05, "loss": 1.7631, "step": 345300 }, { "epoch": 0.22, "learning_rate": 3.897214217098944e-05, "loss": 1.7574, "step": 345400 }, { "epoch": 0.22, "learning_rate": 3.896894012167788e-05, "loss": 1.7725, "step": 345500 }, { "epoch": 0.22, "learning_rate": 3.896573807236632e-05, "loss": 1.7481, "step": 345600 }, { "epoch": 0.22, "learning_rate": 3.8962536023054756e-05, "loss": 1.7601, "step": 345700 }, { "epoch": 0.22, "learning_rate": 3.8959333973743196e-05, "loss": 1.7559, "step": 345800 }, { "epoch": 0.22, "learning_rate": 3.8956131924431636e-05, "loss": 1.7528, "step": 345900 }, { "epoch": 0.22, "learning_rate": 3.895292987512008e-05, "loss": 1.789, "step": 346000 }, { "epoch": 0.22, "eval_loss": 1.7467913627624512, "eval_runtime": 88.9868, "eval_samples_per_second": 112.376, "eval_steps_per_second": 7.024, "step": 346000 }, { "epoch": 0.22, "learning_rate": 3.8949727825808515e-05, "loss": 1.7427, "step": 346100 }, { "epoch": 0.22, "learning_rate": 3.894652577649696e-05, "loss": 1.7639, "step": 346200 }, { "epoch": 0.22, "learning_rate": 3.89433237271854e-05, "loss": 1.7419, "step": 346300 }, { "epoch": 0.22, "learning_rate": 3.894012167787384e-05, "loss": 1.7424, "step": 346400 }, { "epoch": 0.22, "learning_rate": 3.893691962856228e-05, "loss": 1.7539, "step": 346500 }, { "epoch": 0.22, "learning_rate": 3.893371757925072e-05, "loss": 1.7633, "step": 346600 }, { "epoch": 0.22, "learning_rate": 3.893051552993917e-05, "loss": 1.7436, "step": 346700 }, { "epoch": 0.22, "learning_rate": 3.89273134806276e-05, "loss": 1.7577, "step": 346800 }, { "epoch": 0.22, "learning_rate": 3.892411143131605e-05, "loss": 1.7633, "step": 346900 }, { "epoch": 0.22, "learning_rate": 3.8920909382004487e-05, "loss": 1.7457, "step": 347000 }, { "epoch": 0.22, "eval_loss": 1.7491931915283203, "eval_runtime": 93.2981, "eval_samples_per_second": 107.183, "eval_steps_per_second": 6.699, "step": 347000 }, { "epoch": 0.22, "learning_rate": 3.8917707332692926e-05, "loss": 1.774, "step": 347100 }, { "epoch": 0.22, "learning_rate": 3.8914505283381366e-05, "loss": 1.7361, "step": 347200 }, { "epoch": 0.22, "learning_rate": 3.8911303234069806e-05, "loss": 1.7685, "step": 347300 }, { "epoch": 0.22, "learning_rate": 3.8908101184758246e-05, "loss": 1.7534, "step": 347400 }, { "epoch": 0.22, "learning_rate": 3.8904899135446685e-05, "loss": 1.7605, "step": 347500 }, { "epoch": 0.22, "learning_rate": 3.890169708613513e-05, "loss": 1.771, "step": 347600 }, { "epoch": 0.22, "learning_rate": 3.8898495036823565e-05, "loss": 1.7412, "step": 347700 }, { "epoch": 0.22, "learning_rate": 3.889529298751201e-05, "loss": 1.7425, "step": 347800 }, { "epoch": 0.22, "learning_rate": 3.889209093820045e-05, "loss": 1.7453, "step": 347900 }, { "epoch": 0.22, "learning_rate": 3.888888888888889e-05, "loss": 1.7556, "step": 348000 }, { "epoch": 0.22, "eval_loss": 1.7481998205184937, "eval_runtime": 97.146, "eval_samples_per_second": 102.938, "eval_steps_per_second": 6.434, "step": 348000 }, { "epoch": 0.22, "learning_rate": 3.888568683957733e-05, "loss": 1.7464, "step": 348100 }, { "epoch": 0.22, "learning_rate": 3.888248479026577e-05, "loss": 1.7222, "step": 348200 }, { "epoch": 0.22, "learning_rate": 3.887928274095422e-05, "loss": 1.7555, "step": 348300 }, { "epoch": 0.22, "learning_rate": 3.887608069164265e-05, "loss": 1.752, "step": 348400 }, { "epoch": 0.22, "learning_rate": 3.8872878642331096e-05, "loss": 1.7452, "step": 348500 }, { "epoch": 0.22, "learning_rate": 3.8869676593019536e-05, "loss": 1.75, "step": 348600 }, { "epoch": 0.22, "learning_rate": 3.8866474543707976e-05, "loss": 1.7617, "step": 348700 }, { "epoch": 0.22, "learning_rate": 3.8863272494396415e-05, "loss": 1.7677, "step": 348800 }, { "epoch": 0.22, "learning_rate": 3.8860070445084855e-05, "loss": 1.7533, "step": 348900 }, { "epoch": 0.22, "learning_rate": 3.88568683957733e-05, "loss": 1.7603, "step": 349000 }, { "epoch": 0.22, "eval_loss": 1.7509477138519287, "eval_runtime": 94.741, "eval_samples_per_second": 105.551, "eval_steps_per_second": 6.597, "step": 349000 }, { "epoch": 0.22, "learning_rate": 3.8853666346461735e-05, "loss": 1.7621, "step": 349100 }, { "epoch": 0.22, "learning_rate": 3.885046429715018e-05, "loss": 1.7423, "step": 349200 }, { "epoch": 0.22, "learning_rate": 3.8847262247838614e-05, "loss": 1.7685, "step": 349300 }, { "epoch": 0.22, "learning_rate": 3.884406019852706e-05, "loss": 1.7662, "step": 349400 }, { "epoch": 0.22, "learning_rate": 3.88408581492155e-05, "loss": 1.7524, "step": 349500 }, { "epoch": 0.22, "learning_rate": 3.883765609990394e-05, "loss": 1.7581, "step": 349600 }, { "epoch": 0.22, "learning_rate": 3.883445405059239e-05, "loss": 1.7846, "step": 349700 }, { "epoch": 0.22, "learning_rate": 3.883125200128082e-05, "loss": 1.7831, "step": 349800 }, { "epoch": 0.22, "learning_rate": 3.8828049951969266e-05, "loss": 1.754, "step": 349900 }, { "epoch": 0.22, "learning_rate": 3.88248479026577e-05, "loss": 1.7431, "step": 350000 }, { "epoch": 0.22, "eval_loss": 1.747276782989502, "eval_runtime": 92.4928, "eval_samples_per_second": 108.116, "eval_steps_per_second": 6.757, "step": 350000 }, { "epoch": 0.22, "learning_rate": 3.8821645853346146e-05, "loss": 1.7599, "step": 350100 }, { "epoch": 0.22, "learning_rate": 3.8818443804034585e-05, "loss": 1.7624, "step": 350200 }, { "epoch": 0.22, "learning_rate": 3.8815241754723025e-05, "loss": 1.7713, "step": 350300 }, { "epoch": 0.22, "learning_rate": 3.8812039705411465e-05, "loss": 1.7433, "step": 350400 }, { "epoch": 0.22, "learning_rate": 3.8808837656099905e-05, "loss": 1.7436, "step": 350500 }, { "epoch": 0.22, "learning_rate": 3.880563560678835e-05, "loss": 1.7492, "step": 350600 }, { "epoch": 0.22, "learning_rate": 3.8802433557476784e-05, "loss": 1.7577, "step": 350700 }, { "epoch": 0.22, "learning_rate": 3.879923150816523e-05, "loss": 1.7506, "step": 350800 }, { "epoch": 0.22, "learning_rate": 3.8796029458853664e-05, "loss": 1.7587, "step": 350900 }, { "epoch": 0.22, "learning_rate": 3.879282740954211e-05, "loss": 1.745, "step": 351000 }, { "epoch": 0.22, "eval_loss": 1.747901201248169, "eval_runtime": 91.9322, "eval_samples_per_second": 108.776, "eval_steps_per_second": 6.798, "step": 351000 }, { "epoch": 0.22, "learning_rate": 3.878962536023055e-05, "loss": 1.7656, "step": 351100 }, { "epoch": 0.22, "learning_rate": 3.878642331091899e-05, "loss": 1.7508, "step": 351200 }, { "epoch": 0.22, "learning_rate": 3.8783221261607436e-05, "loss": 1.7569, "step": 351300 }, { "epoch": 0.22, "learning_rate": 3.878001921229587e-05, "loss": 1.7624, "step": 351400 }, { "epoch": 0.22, "learning_rate": 3.8776817162984315e-05, "loss": 1.7707, "step": 351500 }, { "epoch": 0.23, "learning_rate": 3.877361511367275e-05, "loss": 1.756, "step": 351600 }, { "epoch": 0.23, "learning_rate": 3.8770413064361195e-05, "loss": 1.7584, "step": 351700 }, { "epoch": 0.23, "learning_rate": 3.8767211015049635e-05, "loss": 1.7394, "step": 351800 }, { "epoch": 0.23, "learning_rate": 3.8764008965738074e-05, "loss": 1.7679, "step": 351900 }, { "epoch": 0.23, "learning_rate": 3.8760806916426514e-05, "loss": 1.7499, "step": 352000 }, { "epoch": 0.23, "eval_loss": 1.7471060752868652, "eval_runtime": 90.0259, "eval_samples_per_second": 111.079, "eval_steps_per_second": 6.942, "step": 352000 }, { "epoch": 0.23, "learning_rate": 3.8757604867114954e-05, "loss": 1.7626, "step": 352100 }, { "epoch": 0.23, "learning_rate": 3.87544028178034e-05, "loss": 1.7558, "step": 352200 }, { "epoch": 0.23, "learning_rate": 3.8751200768491833e-05, "loss": 1.7656, "step": 352300 }, { "epoch": 0.23, "learning_rate": 3.874799871918028e-05, "loss": 1.7589, "step": 352400 }, { "epoch": 0.23, "learning_rate": 3.874479666986871e-05, "loss": 1.7434, "step": 352500 }, { "epoch": 0.23, "learning_rate": 3.874159462055716e-05, "loss": 1.7561, "step": 352600 }, { "epoch": 0.23, "learning_rate": 3.87383925712456e-05, "loss": 1.7618, "step": 352700 }, { "epoch": 0.23, "learning_rate": 3.873519052193404e-05, "loss": 1.7498, "step": 352800 }, { "epoch": 0.23, "learning_rate": 3.8731988472622485e-05, "loss": 1.7686, "step": 352900 }, { "epoch": 0.23, "learning_rate": 3.872878642331092e-05, "loss": 1.7663, "step": 353000 }, { "epoch": 0.23, "eval_loss": 1.7479254007339478, "eval_runtime": 90.211, "eval_samples_per_second": 110.851, "eval_steps_per_second": 6.928, "step": 353000 }, { "epoch": 0.23, "learning_rate": 3.8725584373999365e-05, "loss": 1.7567, "step": 353100 }, { "epoch": 0.23, "learning_rate": 3.87223823246878e-05, "loss": 1.7779, "step": 353200 }, { "epoch": 0.23, "learning_rate": 3.8719180275376244e-05, "loss": 1.7633, "step": 353300 }, { "epoch": 0.23, "learning_rate": 3.8715978226064684e-05, "loss": 1.7458, "step": 353400 }, { "epoch": 0.23, "learning_rate": 3.8712776176753124e-05, "loss": 1.7553, "step": 353500 }, { "epoch": 0.23, "learning_rate": 3.8709574127441564e-05, "loss": 1.7533, "step": 353600 }, { "epoch": 0.23, "learning_rate": 3.870637207813e-05, "loss": 1.7508, "step": 353700 }, { "epoch": 0.23, "learning_rate": 3.870317002881845e-05, "loss": 1.7551, "step": 353800 }, { "epoch": 0.23, "learning_rate": 3.869996797950688e-05, "loss": 1.7607, "step": 353900 }, { "epoch": 0.23, "learning_rate": 3.869676593019533e-05, "loss": 1.7739, "step": 354000 }, { "epoch": 0.23, "eval_loss": 1.7445493936538696, "eval_runtime": 91.8265, "eval_samples_per_second": 108.901, "eval_steps_per_second": 6.806, "step": 354000 }, { "epoch": 0.23, "learning_rate": 3.869356388088376e-05, "loss": 1.7618, "step": 354100 }, { "epoch": 0.23, "learning_rate": 3.869036183157221e-05, "loss": 1.7605, "step": 354200 }, { "epoch": 0.23, "learning_rate": 3.868715978226065e-05, "loss": 1.7852, "step": 354300 }, { "epoch": 0.23, "learning_rate": 3.868395773294909e-05, "loss": 1.7572, "step": 354400 }, { "epoch": 0.23, "learning_rate": 3.8680755683637535e-05, "loss": 1.7535, "step": 354500 }, { "epoch": 0.23, "learning_rate": 3.867755363432597e-05, "loss": 1.7473, "step": 354600 }, { "epoch": 0.23, "learning_rate": 3.8674351585014414e-05, "loss": 1.7497, "step": 354700 }, { "epoch": 0.23, "learning_rate": 3.867114953570285e-05, "loss": 1.7697, "step": 354800 }, { "epoch": 0.23, "learning_rate": 3.8667947486391294e-05, "loss": 1.7493, "step": 354900 }, { "epoch": 0.23, "learning_rate": 3.8664745437079733e-05, "loss": 1.7574, "step": 355000 }, { "epoch": 0.23, "eval_loss": 1.7460416555404663, "eval_runtime": 91.3239, "eval_samples_per_second": 109.5, "eval_steps_per_second": 6.844, "step": 355000 }, { "epoch": 0.23, "learning_rate": 3.866154338776817e-05, "loss": 1.7592, "step": 355100 }, { "epoch": 0.23, "learning_rate": 3.865834133845661e-05, "loss": 1.7442, "step": 355200 }, { "epoch": 0.23, "learning_rate": 3.865513928914505e-05, "loss": 1.776, "step": 355300 }, { "epoch": 0.23, "learning_rate": 3.86519372398335e-05, "loss": 1.7755, "step": 355400 }, { "epoch": 0.23, "learning_rate": 3.864873519052193e-05, "loss": 1.7562, "step": 355500 }, { "epoch": 0.23, "learning_rate": 3.864553314121038e-05, "loss": 1.7498, "step": 355600 }, { "epoch": 0.23, "learning_rate": 3.864233109189881e-05, "loss": 1.7444, "step": 355700 }, { "epoch": 0.23, "learning_rate": 3.863912904258726e-05, "loss": 1.7578, "step": 355800 }, { "epoch": 0.23, "learning_rate": 3.86359269932757e-05, "loss": 1.7466, "step": 355900 }, { "epoch": 0.23, "learning_rate": 3.863272494396414e-05, "loss": 1.7421, "step": 356000 }, { "epoch": 0.23, "eval_loss": 1.7470468282699585, "eval_runtime": 89.5605, "eval_samples_per_second": 111.656, "eval_steps_per_second": 6.979, "step": 356000 }, { "epoch": 0.23, "learning_rate": 3.8629522894652584e-05, "loss": 1.7323, "step": 356100 }, { "epoch": 0.23, "learning_rate": 3.862632084534102e-05, "loss": 1.7631, "step": 356200 }, { "epoch": 0.23, "learning_rate": 3.8623118796029464e-05, "loss": 1.7742, "step": 356300 }, { "epoch": 0.23, "learning_rate": 3.8619916746717897e-05, "loss": 1.7585, "step": 356400 }, { "epoch": 0.23, "learning_rate": 3.861671469740634e-05, "loss": 1.7588, "step": 356500 }, { "epoch": 0.23, "learning_rate": 3.861351264809478e-05, "loss": 1.7652, "step": 356600 }, { "epoch": 0.23, "learning_rate": 3.861031059878322e-05, "loss": 1.7711, "step": 356700 }, { "epoch": 0.23, "learning_rate": 3.860710854947166e-05, "loss": 1.7696, "step": 356800 }, { "epoch": 0.23, "learning_rate": 3.86039065001601e-05, "loss": 1.7679, "step": 356900 }, { "epoch": 0.23, "learning_rate": 3.860070445084855e-05, "loss": 1.769, "step": 357000 }, { "epoch": 0.23, "eval_loss": 1.7463366985321045, "eval_runtime": 88.6469, "eval_samples_per_second": 112.807, "eval_steps_per_second": 7.05, "step": 357000 }, { "epoch": 0.23, "learning_rate": 3.859750240153698e-05, "loss": 1.757, "step": 357100 }, { "epoch": 0.23, "learning_rate": 3.859430035222543e-05, "loss": 1.7667, "step": 357200 }, { "epoch": 0.23, "learning_rate": 3.859109830291386e-05, "loss": 1.7661, "step": 357300 }, { "epoch": 0.23, "learning_rate": 3.858789625360231e-05, "loss": 1.7433, "step": 357400 }, { "epoch": 0.23, "learning_rate": 3.858469420429075e-05, "loss": 1.7559, "step": 357500 }, { "epoch": 0.23, "learning_rate": 3.858149215497919e-05, "loss": 1.7482, "step": 357600 }, { "epoch": 0.23, "learning_rate": 3.8578290105667633e-05, "loss": 1.7515, "step": 357700 }, { "epoch": 0.23, "learning_rate": 3.8575088056356066e-05, "loss": 1.74, "step": 357800 }, { "epoch": 0.23, "learning_rate": 3.857188600704451e-05, "loss": 1.7412, "step": 357900 }, { "epoch": 0.23, "learning_rate": 3.8568683957732946e-05, "loss": 1.7499, "step": 358000 }, { "epoch": 0.23, "eval_loss": 1.748681664466858, "eval_runtime": 94.1553, "eval_samples_per_second": 106.207, "eval_steps_per_second": 6.638, "step": 358000 }, { "epoch": 0.23, "learning_rate": 3.856548190842139e-05, "loss": 1.7587, "step": 358100 }, { "epoch": 0.23, "learning_rate": 3.856227985910983e-05, "loss": 1.7569, "step": 358200 }, { "epoch": 0.23, "learning_rate": 3.855907780979827e-05, "loss": 1.7505, "step": 358300 }, { "epoch": 0.23, "learning_rate": 3.855587576048671e-05, "loss": 1.7434, "step": 358400 }, { "epoch": 0.23, "learning_rate": 3.855267371117515e-05, "loss": 1.7607, "step": 358500 }, { "epoch": 0.23, "learning_rate": 3.85494716618636e-05, "loss": 1.747, "step": 358600 }, { "epoch": 0.23, "learning_rate": 3.854626961255203e-05, "loss": 1.7586, "step": 358700 }, { "epoch": 0.23, "learning_rate": 3.854306756324048e-05, "loss": 1.7476, "step": 358800 }, { "epoch": 0.23, "learning_rate": 3.853986551392892e-05, "loss": 1.7472, "step": 358900 }, { "epoch": 0.23, "learning_rate": 3.853666346461736e-05, "loss": 1.7345, "step": 359000 }, { "epoch": 0.23, "eval_loss": 1.7465978860855103, "eval_runtime": 94.5834, "eval_samples_per_second": 105.727, "eval_steps_per_second": 6.608, "step": 359000 }, { "epoch": 0.23, "learning_rate": 3.85334614153058e-05, "loss": 1.7697, "step": 359100 }, { "epoch": 0.23, "learning_rate": 3.8530259365994236e-05, "loss": 1.7569, "step": 359200 }, { "epoch": 0.23, "learning_rate": 3.852705731668268e-05, "loss": 1.7475, "step": 359300 }, { "epoch": 0.23, "learning_rate": 3.8523855267371116e-05, "loss": 1.7414, "step": 359400 }, { "epoch": 0.23, "learning_rate": 3.852065321805956e-05, "loss": 1.7539, "step": 359500 }, { "epoch": 0.23, "learning_rate": 3.8517451168748e-05, "loss": 1.7616, "step": 359600 }, { "epoch": 0.23, "learning_rate": 3.851424911943644e-05, "loss": 1.7453, "step": 359700 }, { "epoch": 0.23, "learning_rate": 3.851104707012488e-05, "loss": 1.7515, "step": 359800 }, { "epoch": 0.23, "learning_rate": 3.850784502081332e-05, "loss": 1.7394, "step": 359900 }, { "epoch": 0.23, "learning_rate": 3.850464297150176e-05, "loss": 1.7323, "step": 360000 }, { "epoch": 0.23, "eval_loss": 1.7468518018722534, "eval_runtime": 97.5622, "eval_samples_per_second": 102.499, "eval_steps_per_second": 6.406, "step": 360000 }, { "epoch": 0.23, "learning_rate": 3.85014409221902e-05, "loss": 1.7485, "step": 360100 }, { "epoch": 0.23, "learning_rate": 3.849823887287865e-05, "loss": 1.7474, "step": 360200 }, { "epoch": 0.23, "learning_rate": 3.849503682356708e-05, "loss": 1.7375, "step": 360300 }, { "epoch": 0.23, "learning_rate": 3.849183477425553e-05, "loss": 1.7606, "step": 360400 }, { "epoch": 0.23, "learning_rate": 3.8488632724943967e-05, "loss": 1.7573, "step": 360500 }, { "epoch": 0.23, "learning_rate": 3.8485430675632406e-05, "loss": 1.7477, "step": 360600 }, { "epoch": 0.23, "learning_rate": 3.8482228626320846e-05, "loss": 1.7437, "step": 360700 }, { "epoch": 0.23, "learning_rate": 3.8479026577009286e-05, "loss": 1.7464, "step": 360800 }, { "epoch": 0.23, "learning_rate": 3.847582452769773e-05, "loss": 1.7453, "step": 360900 }, { "epoch": 0.23, "learning_rate": 3.8472622478386165e-05, "loss": 1.7561, "step": 361000 }, { "epoch": 0.23, "eval_loss": 1.743094563484192, "eval_runtime": 97.6349, "eval_samples_per_second": 102.422, "eval_steps_per_second": 6.401, "step": 361000 }, { "epoch": 0.23, "learning_rate": 3.846942042907461e-05, "loss": 1.7308, "step": 361100 }, { "epoch": 0.23, "learning_rate": 3.846621837976305e-05, "loss": 1.7398, "step": 361200 }, { "epoch": 0.23, "learning_rate": 3.846301633045149e-05, "loss": 1.7625, "step": 361300 }, { "epoch": 0.23, "learning_rate": 3.845981428113993e-05, "loss": 1.7672, "step": 361400 }, { "epoch": 0.23, "learning_rate": 3.845661223182837e-05, "loss": 1.7568, "step": 361500 }, { "epoch": 0.23, "learning_rate": 3.845341018251681e-05, "loss": 1.7535, "step": 361600 }, { "epoch": 0.23, "learning_rate": 3.845020813320525e-05, "loss": 1.7585, "step": 361700 }, { "epoch": 0.23, "learning_rate": 3.84470060838937e-05, "loss": 1.749, "step": 361800 }, { "epoch": 0.23, "learning_rate": 3.8443804034582136e-05, "loss": 1.7469, "step": 361900 }, { "epoch": 0.23, "learning_rate": 3.8440601985270576e-05, "loss": 1.7497, "step": 362000 }, { "epoch": 0.23, "eval_loss": 1.7448327541351318, "eval_runtime": 91.8362, "eval_samples_per_second": 108.89, "eval_steps_per_second": 6.806, "step": 362000 }, { "epoch": 0.23, "learning_rate": 3.8437399935959016e-05, "loss": 1.7349, "step": 362100 }, { "epoch": 0.23, "learning_rate": 3.8434197886647456e-05, "loss": 1.7338, "step": 362200 }, { "epoch": 0.23, "learning_rate": 3.8430995837335895e-05, "loss": 1.7491, "step": 362300 }, { "epoch": 0.23, "learning_rate": 3.8427793788024335e-05, "loss": 1.7589, "step": 362400 }, { "epoch": 0.23, "learning_rate": 3.842459173871278e-05, "loss": 1.7415, "step": 362500 }, { "epoch": 0.23, "learning_rate": 3.8421389689401215e-05, "loss": 1.7516, "step": 362600 }, { "epoch": 0.23, "learning_rate": 3.841818764008966e-05, "loss": 1.7668, "step": 362700 }, { "epoch": 0.23, "learning_rate": 3.84149855907781e-05, "loss": 1.7474, "step": 362800 }, { "epoch": 0.23, "learning_rate": 3.841178354146654e-05, "loss": 1.7677, "step": 362900 }, { "epoch": 0.23, "learning_rate": 3.840858149215498e-05, "loss": 1.7656, "step": 363000 }, { "epoch": 0.23, "eval_loss": 1.7445811033248901, "eval_runtime": 92.6675, "eval_samples_per_second": 107.913, "eval_steps_per_second": 6.745, "step": 363000 }, { "epoch": 0.23, "learning_rate": 3.840537944284342e-05, "loss": 1.7448, "step": 363100 }, { "epoch": 0.23, "learning_rate": 3.840217739353186e-05, "loss": 1.7542, "step": 363200 }, { "epoch": 0.23, "learning_rate": 3.83989753442203e-05, "loss": 1.7706, "step": 363300 }, { "epoch": 0.23, "learning_rate": 3.8395773294908746e-05, "loss": 1.7423, "step": 363400 }, { "epoch": 0.23, "learning_rate": 3.8392571245597186e-05, "loss": 1.7518, "step": 363500 }, { "epoch": 0.23, "learning_rate": 3.8389369196285626e-05, "loss": 1.7616, "step": 363600 }, { "epoch": 0.23, "learning_rate": 3.8386167146974065e-05, "loss": 1.7393, "step": 363700 }, { "epoch": 0.23, "learning_rate": 3.8382965097662505e-05, "loss": 1.7442, "step": 363800 }, { "epoch": 0.23, "learning_rate": 3.8379763048350945e-05, "loss": 1.7383, "step": 363900 }, { "epoch": 0.23, "learning_rate": 3.8376560999039384e-05, "loss": 1.7563, "step": 364000 }, { "epoch": 0.23, "eval_loss": 1.746949553489685, "eval_runtime": 94.0383, "eval_samples_per_second": 106.34, "eval_steps_per_second": 6.646, "step": 364000 }, { "epoch": 0.23, "learning_rate": 3.837335894972783e-05, "loss": 1.7538, "step": 364100 }, { "epoch": 0.23, "learning_rate": 3.837015690041627e-05, "loss": 1.752, "step": 364200 }, { "epoch": 0.23, "learning_rate": 3.836695485110471e-05, "loss": 1.7387, "step": 364300 }, { "epoch": 0.23, "learning_rate": 3.836375280179315e-05, "loss": 1.7547, "step": 364400 }, { "epoch": 0.23, "learning_rate": 3.836055075248159e-05, "loss": 1.7409, "step": 364500 }, { "epoch": 0.23, "learning_rate": 3.835734870317003e-05, "loss": 1.7451, "step": 364600 }, { "epoch": 0.23, "learning_rate": 3.835414665385847e-05, "loss": 1.746, "step": 364700 }, { "epoch": 0.23, "learning_rate": 3.835094460454691e-05, "loss": 1.7424, "step": 364800 }, { "epoch": 0.23, "learning_rate": 3.834774255523535e-05, "loss": 1.7748, "step": 364900 }, { "epoch": 0.23, "learning_rate": 3.8344540505923795e-05, "loss": 1.7542, "step": 365000 }, { "epoch": 0.23, "eval_loss": 1.7440320253372192, "eval_runtime": 91.9571, "eval_samples_per_second": 108.746, "eval_steps_per_second": 6.797, "step": 365000 }, { "epoch": 0.23, "learning_rate": 3.8341338456612235e-05, "loss": 1.7559, "step": 365100 }, { "epoch": 0.23, "learning_rate": 3.8338136407300675e-05, "loss": 1.7222, "step": 365200 }, { "epoch": 0.23, "learning_rate": 3.8334934357989115e-05, "loss": 1.7406, "step": 365300 }, { "epoch": 0.23, "learning_rate": 3.8331732308677554e-05, "loss": 1.7698, "step": 365400 }, { "epoch": 0.23, "learning_rate": 3.8328530259365994e-05, "loss": 1.7412, "step": 365500 }, { "epoch": 0.23, "learning_rate": 3.8325328210054434e-05, "loss": 1.743, "step": 365600 }, { "epoch": 0.23, "learning_rate": 3.832212616074288e-05, "loss": 1.765, "step": 365700 }, { "epoch": 0.23, "learning_rate": 3.831892411143132e-05, "loss": 1.733, "step": 365800 }, { "epoch": 0.23, "learning_rate": 3.831572206211976e-05, "loss": 1.7647, "step": 365900 }, { "epoch": 0.23, "learning_rate": 3.83125200128082e-05, "loss": 1.7652, "step": 366000 }, { "epoch": 0.23, "eval_loss": 1.7457003593444824, "eval_runtime": 97.6854, "eval_samples_per_second": 102.369, "eval_steps_per_second": 6.398, "step": 366000 }, { "epoch": 0.23, "learning_rate": 3.830931796349664e-05, "loss": 1.7494, "step": 366100 }, { "epoch": 0.23, "learning_rate": 3.830611591418508e-05, "loss": 1.7493, "step": 366200 }, { "epoch": 0.23, "learning_rate": 3.830291386487352e-05, "loss": 1.7289, "step": 366300 }, { "epoch": 0.23, "learning_rate": 3.829971181556196e-05, "loss": 1.7565, "step": 366400 }, { "epoch": 0.23, "learning_rate": 3.8296509766250405e-05, "loss": 1.7362, "step": 366500 }, { "epoch": 0.23, "learning_rate": 3.8293307716938845e-05, "loss": 1.7324, "step": 366600 }, { "epoch": 0.23, "learning_rate": 3.8290105667627285e-05, "loss": 1.7224, "step": 366700 }, { "epoch": 0.23, "learning_rate": 3.8286903618315724e-05, "loss": 1.765, "step": 366800 }, { "epoch": 0.23, "learning_rate": 3.8283701569004164e-05, "loss": 1.7478, "step": 366900 }, { "epoch": 0.23, "learning_rate": 3.8280499519692604e-05, "loss": 1.7446, "step": 367000 }, { "epoch": 0.23, "eval_loss": 1.7456002235412598, "eval_runtime": 98.6837, "eval_samples_per_second": 101.334, "eval_steps_per_second": 6.333, "step": 367000 }, { "epoch": 0.23, "learning_rate": 3.8277297470381043e-05, "loss": 1.7303, "step": 367100 }, { "epoch": 0.24, "learning_rate": 3.827409542106948e-05, "loss": 1.7423, "step": 367200 }, { "epoch": 0.24, "learning_rate": 3.827089337175793e-05, "loss": 1.75, "step": 367300 }, { "epoch": 0.24, "learning_rate": 3.826769132244637e-05, "loss": 1.7584, "step": 367400 }, { "epoch": 0.24, "learning_rate": 3.826448927313481e-05, "loss": 1.7418, "step": 367500 }, { "epoch": 0.24, "learning_rate": 3.826128722382325e-05, "loss": 1.7443, "step": 367600 }, { "epoch": 0.24, "learning_rate": 3.825808517451169e-05, "loss": 1.78, "step": 367700 }, { "epoch": 0.24, "learning_rate": 3.825488312520013e-05, "loss": 1.7431, "step": 367800 }, { "epoch": 0.24, "learning_rate": 3.825168107588857e-05, "loss": 1.7582, "step": 367900 }, { "epoch": 0.24, "learning_rate": 3.824847902657701e-05, "loss": 1.7303, "step": 368000 }, { "epoch": 0.24, "eval_loss": 1.7463531494140625, "eval_runtime": 95.0092, "eval_samples_per_second": 105.253, "eval_steps_per_second": 6.578, "step": 368000 }, { "epoch": 0.24, "learning_rate": 3.8245276977265454e-05, "loss": 1.7552, "step": 368100 }, { "epoch": 0.24, "learning_rate": 3.8242074927953894e-05, "loss": 1.7621, "step": 368200 }, { "epoch": 0.24, "learning_rate": 3.8238872878642334e-05, "loss": 1.7479, "step": 368300 }, { "epoch": 0.24, "learning_rate": 3.8235670829330774e-05, "loss": 1.7451, "step": 368400 }, { "epoch": 0.24, "learning_rate": 3.823246878001921e-05, "loss": 1.7519, "step": 368500 }, { "epoch": 0.24, "learning_rate": 3.822926673070765e-05, "loss": 1.7642, "step": 368600 }, { "epoch": 0.24, "learning_rate": 3.822606468139609e-05, "loss": 1.7549, "step": 368700 }, { "epoch": 0.24, "learning_rate": 3.822286263208454e-05, "loss": 1.7503, "step": 368800 }, { "epoch": 0.24, "learning_rate": 3.821966058277298e-05, "loss": 1.7476, "step": 368900 }, { "epoch": 0.24, "learning_rate": 3.821645853346142e-05, "loss": 1.7699, "step": 369000 }, { "epoch": 0.24, "eval_loss": 1.7444673776626587, "eval_runtime": 96.4842, "eval_samples_per_second": 103.644, "eval_steps_per_second": 6.478, "step": 369000 }, { "epoch": 0.24, "learning_rate": 3.821325648414986e-05, "loss": 1.7538, "step": 369100 }, { "epoch": 0.24, "learning_rate": 3.82100544348383e-05, "loss": 1.7351, "step": 369200 }, { "epoch": 0.24, "learning_rate": 3.820685238552674e-05, "loss": 1.7569, "step": 369300 }, { "epoch": 0.24, "learning_rate": 3.820365033621518e-05, "loss": 1.7299, "step": 369400 }, { "epoch": 0.24, "learning_rate": 3.8200448286903624e-05, "loss": 1.7463, "step": 369500 }, { "epoch": 0.24, "learning_rate": 3.819724623759206e-05, "loss": 1.746, "step": 369600 }, { "epoch": 0.24, "learning_rate": 3.8194044188280504e-05, "loss": 1.7472, "step": 369700 }, { "epoch": 0.24, "learning_rate": 3.8190842138968944e-05, "loss": 1.7458, "step": 369800 }, { "epoch": 0.24, "learning_rate": 3.818764008965738e-05, "loss": 1.7526, "step": 369900 }, { "epoch": 0.24, "learning_rate": 3.818443804034582e-05, "loss": 1.7412, "step": 370000 }, { "epoch": 0.24, "eval_loss": 1.7468894720077515, "eval_runtime": 94.2349, "eval_samples_per_second": 106.118, "eval_steps_per_second": 6.632, "step": 370000 }, { "epoch": 0.24, "learning_rate": 3.818123599103426e-05, "loss": 1.7489, "step": 370100 }, { "epoch": 0.24, "learning_rate": 3.81780339417227e-05, "loss": 1.744, "step": 370200 }, { "epoch": 0.24, "learning_rate": 3.817483189241114e-05, "loss": 1.7635, "step": 370300 }, { "epoch": 0.24, "learning_rate": 3.817162984309959e-05, "loss": 1.7501, "step": 370400 }, { "epoch": 0.24, "learning_rate": 3.816842779378803e-05, "loss": 1.7471, "step": 370500 }, { "epoch": 0.24, "learning_rate": 3.816522574447647e-05, "loss": 1.7567, "step": 370600 }, { "epoch": 0.24, "learning_rate": 3.816202369516491e-05, "loss": 1.7339, "step": 370700 }, { "epoch": 0.24, "learning_rate": 3.815882164585335e-05, "loss": 1.7504, "step": 370800 }, { "epoch": 0.24, "learning_rate": 3.815561959654179e-05, "loss": 1.7236, "step": 370900 }, { "epoch": 0.24, "learning_rate": 3.815241754723023e-05, "loss": 1.7587, "step": 371000 }, { "epoch": 0.24, "eval_loss": 1.7457302808761597, "eval_runtime": 91.8805, "eval_samples_per_second": 108.837, "eval_steps_per_second": 6.802, "step": 371000 }, { "epoch": 0.24, "learning_rate": 3.8149215497918674e-05, "loss": 1.7761, "step": 371100 }, { "epoch": 0.24, "learning_rate": 3.814601344860711e-05, "loss": 1.7387, "step": 371200 }, { "epoch": 0.24, "learning_rate": 3.814281139929555e-05, "loss": 1.7434, "step": 371300 }, { "epoch": 0.24, "learning_rate": 3.813960934998399e-05, "loss": 1.7273, "step": 371400 }, { "epoch": 0.24, "learning_rate": 3.813640730067243e-05, "loss": 1.7339, "step": 371500 }, { "epoch": 0.24, "learning_rate": 3.813320525136087e-05, "loss": 1.7524, "step": 371600 }, { "epoch": 0.24, "learning_rate": 3.813000320204931e-05, "loss": 1.761, "step": 371700 }, { "epoch": 0.24, "learning_rate": 3.812680115273776e-05, "loss": 1.7409, "step": 371800 }, { "epoch": 0.24, "learning_rate": 3.812359910342619e-05, "loss": 1.7568, "step": 371900 }, { "epoch": 0.24, "learning_rate": 3.812039705411464e-05, "loss": 1.7527, "step": 372000 }, { "epoch": 0.24, "eval_loss": 1.7434207201004028, "eval_runtime": 92.411, "eval_samples_per_second": 108.212, "eval_steps_per_second": 6.763, "step": 372000 }, { "epoch": 0.24, "learning_rate": 3.811719500480308e-05, "loss": 1.7608, "step": 372100 }, { "epoch": 0.24, "learning_rate": 3.811399295549152e-05, "loss": 1.7539, "step": 372200 }, { "epoch": 0.24, "learning_rate": 3.811079090617996e-05, "loss": 1.7383, "step": 372300 }, { "epoch": 0.24, "learning_rate": 3.81075888568684e-05, "loss": 1.7581, "step": 372400 }, { "epoch": 0.24, "learning_rate": 3.810438680755684e-05, "loss": 1.7499, "step": 372500 }, { "epoch": 0.24, "learning_rate": 3.8101184758245277e-05, "loss": 1.7356, "step": 372600 }, { "epoch": 0.24, "learning_rate": 3.809798270893372e-05, "loss": 1.7437, "step": 372700 }, { "epoch": 0.24, "learning_rate": 3.8094780659622156e-05, "loss": 1.7502, "step": 372800 }, { "epoch": 0.24, "learning_rate": 3.80915786103106e-05, "loss": 1.7646, "step": 372900 }, { "epoch": 0.24, "learning_rate": 3.808837656099904e-05, "loss": 1.742, "step": 373000 }, { "epoch": 0.24, "eval_loss": 1.7445658445358276, "eval_runtime": 96.071, "eval_samples_per_second": 104.09, "eval_steps_per_second": 6.506, "step": 373000 }, { "epoch": 0.24, "learning_rate": 3.808517451168748e-05, "loss": 1.7432, "step": 373100 }, { "epoch": 0.24, "learning_rate": 3.808197246237592e-05, "loss": 1.7439, "step": 373200 }, { "epoch": 0.24, "learning_rate": 3.807877041306436e-05, "loss": 1.7643, "step": 373300 }, { "epoch": 0.24, "learning_rate": 3.807556836375281e-05, "loss": 1.756, "step": 373400 }, { "epoch": 0.24, "learning_rate": 3.807236631444124e-05, "loss": 1.756, "step": 373500 }, { "epoch": 0.24, "learning_rate": 3.806916426512969e-05, "loss": 1.7479, "step": 373600 }, { "epoch": 0.24, "learning_rate": 3.806596221581813e-05, "loss": 1.7522, "step": 373700 }, { "epoch": 0.24, "learning_rate": 3.806276016650657e-05, "loss": 1.7381, "step": 373800 }, { "epoch": 0.24, "learning_rate": 3.805955811719501e-05, "loss": 1.7493, "step": 373900 }, { "epoch": 0.24, "learning_rate": 3.8056356067883446e-05, "loss": 1.748, "step": 374000 }, { "epoch": 0.24, "eval_loss": 1.7483439445495605, "eval_runtime": 97.7375, "eval_samples_per_second": 102.315, "eval_steps_per_second": 6.395, "step": 374000 }, { "epoch": 0.24, "learning_rate": 3.805315401857189e-05, "loss": 1.7541, "step": 374100 }, { "epoch": 0.24, "learning_rate": 3.8049951969260326e-05, "loss": 1.7553, "step": 374200 }, { "epoch": 0.24, "learning_rate": 3.804674991994877e-05, "loss": 1.7473, "step": 374300 }, { "epoch": 0.24, "learning_rate": 3.8043547870637205e-05, "loss": 1.745, "step": 374400 }, { "epoch": 0.24, "learning_rate": 3.804034582132565e-05, "loss": 1.7168, "step": 374500 }, { "epoch": 0.24, "learning_rate": 3.803714377201409e-05, "loss": 1.7579, "step": 374600 }, { "epoch": 0.24, "learning_rate": 3.803394172270253e-05, "loss": 1.746, "step": 374700 }, { "epoch": 0.24, "learning_rate": 3.803073967339097e-05, "loss": 1.7618, "step": 374800 }, { "epoch": 0.24, "learning_rate": 3.802753762407941e-05, "loss": 1.76, "step": 374900 }, { "epoch": 0.24, "learning_rate": 3.802433557476786e-05, "loss": 1.7566, "step": 375000 }, { "epoch": 0.24, "eval_loss": 1.7437068223953247, "eval_runtime": 95.6045, "eval_samples_per_second": 104.598, "eval_steps_per_second": 6.537, "step": 375000 }, { "epoch": 0.24, "learning_rate": 3.802113352545629e-05, "loss": 1.7291, "step": 375100 }, { "epoch": 0.24, "learning_rate": 3.801793147614474e-05, "loss": 1.7554, "step": 375200 }, { "epoch": 0.24, "learning_rate": 3.8014729426833177e-05, "loss": 1.7495, "step": 375300 }, { "epoch": 0.24, "learning_rate": 3.8011527377521616e-05, "loss": 1.7492, "step": 375400 }, { "epoch": 0.24, "learning_rate": 3.8008325328210056e-05, "loss": 1.7521, "step": 375500 }, { "epoch": 0.24, "learning_rate": 3.8005123278898496e-05, "loss": 1.7454, "step": 375600 }, { "epoch": 0.24, "learning_rate": 3.800192122958694e-05, "loss": 1.7258, "step": 375700 }, { "epoch": 0.24, "learning_rate": 3.7998719180275375e-05, "loss": 1.7401, "step": 375800 }, { "epoch": 0.24, "learning_rate": 3.799551713096382e-05, "loss": 1.7621, "step": 375900 }, { "epoch": 0.24, "learning_rate": 3.7992315081652255e-05, "loss": 1.7574, "step": 376000 }, { "epoch": 0.24, "eval_loss": 1.743612289428711, "eval_runtime": 91.8455, "eval_samples_per_second": 108.878, "eval_steps_per_second": 6.805, "step": 376000 }, { "epoch": 0.24, "learning_rate": 3.79891130323407e-05, "loss": 1.7517, "step": 376100 }, { "epoch": 0.24, "learning_rate": 3.798591098302914e-05, "loss": 1.749, "step": 376200 }, { "epoch": 0.24, "learning_rate": 3.798270893371758e-05, "loss": 1.755, "step": 376300 }, { "epoch": 0.24, "learning_rate": 3.797950688440603e-05, "loss": 1.7403, "step": 376400 }, { "epoch": 0.24, "learning_rate": 3.797630483509446e-05, "loss": 1.7457, "step": 376500 }, { "epoch": 0.24, "learning_rate": 3.797310278578291e-05, "loss": 1.7431, "step": 376600 }, { "epoch": 0.24, "learning_rate": 3.796990073647134e-05, "loss": 1.7347, "step": 376700 }, { "epoch": 0.24, "learning_rate": 3.7966698687159786e-05, "loss": 1.7423, "step": 376800 }, { "epoch": 0.24, "learning_rate": 3.7963496637848226e-05, "loss": 1.7438, "step": 376900 }, { "epoch": 0.24, "learning_rate": 3.7960294588536666e-05, "loss": 1.7627, "step": 377000 }, { "epoch": 0.24, "eval_loss": 1.74214506149292, "eval_runtime": 94.4176, "eval_samples_per_second": 105.912, "eval_steps_per_second": 6.62, "step": 377000 }, { "epoch": 0.24, "learning_rate": 3.7957092539225105e-05, "loss": 1.7354, "step": 377100 }, { "epoch": 0.24, "learning_rate": 3.7953890489913545e-05, "loss": 1.7365, "step": 377200 }, { "epoch": 0.24, "learning_rate": 3.795068844060199e-05, "loss": 1.7313, "step": 377300 }, { "epoch": 0.24, "learning_rate": 3.7947486391290425e-05, "loss": 1.7145, "step": 377400 }, { "epoch": 0.24, "learning_rate": 3.794428434197887e-05, "loss": 1.7181, "step": 377500 }, { "epoch": 0.24, "learning_rate": 3.7941082292667304e-05, "loss": 1.7524, "step": 377600 }, { "epoch": 0.24, "learning_rate": 3.793788024335575e-05, "loss": 1.7453, "step": 377700 }, { "epoch": 0.24, "learning_rate": 3.793467819404419e-05, "loss": 1.7271, "step": 377800 }, { "epoch": 0.24, "learning_rate": 3.793147614473263e-05, "loss": 1.7344, "step": 377900 }, { "epoch": 0.24, "learning_rate": 3.792827409542108e-05, "loss": 1.737, "step": 378000 }, { "epoch": 0.24, "eval_loss": 1.7458417415618896, "eval_runtime": 93.0403, "eval_samples_per_second": 107.48, "eval_steps_per_second": 6.718, "step": 378000 }, { "epoch": 0.24, "learning_rate": 3.792507204610951e-05, "loss": 1.7427, "step": 378100 }, { "epoch": 0.24, "learning_rate": 3.7921869996797956e-05, "loss": 1.7294, "step": 378200 }, { "epoch": 0.24, "learning_rate": 3.791866794748639e-05, "loss": 1.7465, "step": 378300 }, { "epoch": 0.24, "learning_rate": 3.7915465898174836e-05, "loss": 1.7428, "step": 378400 }, { "epoch": 0.24, "learning_rate": 3.7912263848863275e-05, "loss": 1.7457, "step": 378500 }, { "epoch": 0.24, "learning_rate": 3.7909061799551715e-05, "loss": 1.7507, "step": 378600 }, { "epoch": 0.24, "learning_rate": 3.7905859750240155e-05, "loss": 1.7427, "step": 378700 }, { "epoch": 0.24, "learning_rate": 3.7902657700928595e-05, "loss": 1.759, "step": 378800 }, { "epoch": 0.24, "learning_rate": 3.789945565161704e-05, "loss": 1.7326, "step": 378900 }, { "epoch": 0.24, "learning_rate": 3.7896253602305474e-05, "loss": 1.7527, "step": 379000 }, { "epoch": 0.24, "eval_loss": 1.7435646057128906, "eval_runtime": 93.0831, "eval_samples_per_second": 107.431, "eval_steps_per_second": 6.714, "step": 379000 }, { "epoch": 0.24, "learning_rate": 3.789305155299392e-05, "loss": 1.7258, "step": 379100 }, { "epoch": 0.24, "learning_rate": 3.7889849503682353e-05, "loss": 1.7563, "step": 379200 }, { "epoch": 0.24, "learning_rate": 3.78866474543708e-05, "loss": 1.7459, "step": 379300 }, { "epoch": 0.24, "learning_rate": 3.788344540505924e-05, "loss": 1.7414, "step": 379400 }, { "epoch": 0.24, "learning_rate": 3.788024335574768e-05, "loss": 1.7428, "step": 379500 }, { "epoch": 0.24, "learning_rate": 3.7877041306436126e-05, "loss": 1.7181, "step": 379600 }, { "epoch": 0.24, "learning_rate": 3.787383925712456e-05, "loss": 1.7498, "step": 379700 }, { "epoch": 0.24, "learning_rate": 3.7870637207813005e-05, "loss": 1.754, "step": 379800 }, { "epoch": 0.24, "learning_rate": 3.786743515850144e-05, "loss": 1.7607, "step": 379900 }, { "epoch": 0.24, "learning_rate": 3.7864233109189885e-05, "loss": 1.7234, "step": 380000 }, { "epoch": 0.24, "eval_loss": 1.7444578409194946, "eval_runtime": 96.3552, "eval_samples_per_second": 103.783, "eval_steps_per_second": 6.486, "step": 380000 }, { "epoch": 0.24, "learning_rate": 3.7861031059878325e-05, "loss": 1.7418, "step": 380100 }, { "epoch": 0.24, "learning_rate": 3.7857829010566764e-05, "loss": 1.7355, "step": 380200 }, { "epoch": 0.24, "learning_rate": 3.7854626961255204e-05, "loss": 1.7415, "step": 380300 }, { "epoch": 0.24, "learning_rate": 3.7851424911943644e-05, "loss": 1.7392, "step": 380400 }, { "epoch": 0.24, "learning_rate": 3.784822286263209e-05, "loss": 1.7482, "step": 380500 }, { "epoch": 0.24, "learning_rate": 3.7845020813320523e-05, "loss": 1.7389, "step": 380600 }, { "epoch": 0.24, "learning_rate": 3.784181876400897e-05, "loss": 1.765, "step": 380700 }, { "epoch": 0.24, "learning_rate": 3.78386167146974e-05, "loss": 1.7402, "step": 380800 }, { "epoch": 0.24, "learning_rate": 3.783541466538585e-05, "loss": 1.7395, "step": 380900 }, { "epoch": 0.24, "learning_rate": 3.783221261607429e-05, "loss": 1.7306, "step": 381000 }, { "epoch": 0.24, "eval_loss": 1.7445309162139893, "eval_runtime": 93.5333, "eval_samples_per_second": 106.914, "eval_steps_per_second": 6.682, "step": 381000 }, { "epoch": 0.24, "learning_rate": 3.782901056676273e-05, "loss": 1.7489, "step": 381100 }, { "epoch": 0.24, "learning_rate": 3.7825808517451175e-05, "loss": 1.7298, "step": 381200 }, { "epoch": 0.24, "learning_rate": 3.782260646813961e-05, "loss": 1.7416, "step": 381300 }, { "epoch": 0.24, "learning_rate": 3.7819404418828055e-05, "loss": 1.7507, "step": 381400 }, { "epoch": 0.24, "learning_rate": 3.781620236951649e-05, "loss": 1.749, "step": 381500 }, { "epoch": 0.24, "learning_rate": 3.7813000320204934e-05, "loss": 1.7461, "step": 381600 }, { "epoch": 0.24, "learning_rate": 3.7809798270893374e-05, "loss": 1.7277, "step": 381700 }, { "epoch": 0.24, "learning_rate": 3.7806596221581814e-05, "loss": 1.7529, "step": 381800 }, { "epoch": 0.24, "learning_rate": 3.7803394172270254e-05, "loss": 1.7508, "step": 381900 }, { "epoch": 0.24, "learning_rate": 3.780019212295869e-05, "loss": 1.7392, "step": 382000 }, { "epoch": 0.24, "eval_loss": 1.7485840320587158, "eval_runtime": 89.9909, "eval_samples_per_second": 111.122, "eval_steps_per_second": 6.945, "step": 382000 }, { "epoch": 0.24, "learning_rate": 3.779699007364714e-05, "loss": 1.7316, "step": 382100 }, { "epoch": 0.24, "learning_rate": 3.779378802433557e-05, "loss": 1.7455, "step": 382200 }, { "epoch": 0.24, "learning_rate": 3.779058597502402e-05, "loss": 1.7581, "step": 382300 }, { "epoch": 0.24, "learning_rate": 3.778738392571245e-05, "loss": 1.7492, "step": 382400 }, { "epoch": 0.24, "learning_rate": 3.77841818764009e-05, "loss": 1.751, "step": 382500 }, { "epoch": 0.24, "learning_rate": 3.778097982708934e-05, "loss": 1.7423, "step": 382600 }, { "epoch": 0.24, "learning_rate": 3.777777777777778e-05, "loss": 1.736, "step": 382700 }, { "epoch": 0.24, "learning_rate": 3.7774575728466225e-05, "loss": 1.7685, "step": 382800 }, { "epoch": 0.25, "learning_rate": 3.777137367915466e-05, "loss": 1.7185, "step": 382900 }, { "epoch": 0.25, "learning_rate": 3.7768171629843104e-05, "loss": 1.7274, "step": 383000 }, { "epoch": 0.25, "eval_loss": 1.7461029291152954, "eval_runtime": 92.0696, "eval_samples_per_second": 108.614, "eval_steps_per_second": 6.788, "step": 383000 }, { "epoch": 0.25, "learning_rate": 3.776496958053154e-05, "loss": 1.7098, "step": 383100 }, { "epoch": 0.25, "learning_rate": 3.7761767531219984e-05, "loss": 1.7517, "step": 383200 }, { "epoch": 0.25, "learning_rate": 3.7758565481908423e-05, "loss": 1.7471, "step": 383300 }, { "epoch": 0.25, "learning_rate": 3.775536343259686e-05, "loss": 1.7435, "step": 383400 }, { "epoch": 0.25, "learning_rate": 3.77521613832853e-05, "loss": 1.7475, "step": 383500 }, { "epoch": 0.25, "learning_rate": 3.774895933397374e-05, "loss": 1.7402, "step": 383600 }, { "epoch": 0.25, "learning_rate": 3.774575728466219e-05, "loss": 1.7311, "step": 383700 }, { "epoch": 0.25, "learning_rate": 3.774255523535062e-05, "loss": 1.7246, "step": 383800 }, { "epoch": 0.25, "learning_rate": 3.773935318603907e-05, "loss": 1.758, "step": 383900 }, { "epoch": 0.25, "learning_rate": 3.773615113672751e-05, "loss": 1.7275, "step": 384000 }, { "epoch": 0.25, "eval_loss": 1.7488383054733276, "eval_runtime": 90.7739, "eval_samples_per_second": 110.164, "eval_steps_per_second": 6.885, "step": 384000 }, { "epoch": 0.25, "learning_rate": 3.773294908741595e-05, "loss": 1.7294, "step": 384100 }, { "epoch": 0.25, "learning_rate": 3.772974703810439e-05, "loss": 1.7408, "step": 384200 }, { "epoch": 0.25, "learning_rate": 3.772654498879283e-05, "loss": 1.7477, "step": 384300 }, { "epoch": 0.25, "learning_rate": 3.7723342939481274e-05, "loss": 1.7371, "step": 384400 }, { "epoch": 0.25, "learning_rate": 3.772014089016971e-05, "loss": 1.7341, "step": 384500 }, { "epoch": 0.25, "learning_rate": 3.7716938840858154e-05, "loss": 1.7293, "step": 384600 }, { "epoch": 0.25, "learning_rate": 3.7713736791546587e-05, "loss": 1.7574, "step": 384700 }, { "epoch": 0.25, "learning_rate": 3.771053474223503e-05, "loss": 1.7486, "step": 384800 }, { "epoch": 0.25, "learning_rate": 3.770733269292347e-05, "loss": 1.7409, "step": 384900 }, { "epoch": 0.25, "learning_rate": 3.770413064361191e-05, "loss": 1.7336, "step": 385000 }, { "epoch": 0.25, "eval_loss": 1.7428812980651855, "eval_runtime": 95.5326, "eval_samples_per_second": 104.676, "eval_steps_per_second": 6.542, "step": 385000 }, { "epoch": 0.25, "learning_rate": 3.770092859430035e-05, "loss": 1.7397, "step": 385100 }, { "epoch": 0.25, "learning_rate": 3.769772654498879e-05, "loss": 1.7361, "step": 385200 }, { "epoch": 0.25, "learning_rate": 3.769452449567724e-05, "loss": 1.7313, "step": 385300 }, { "epoch": 0.25, "learning_rate": 3.769132244636567e-05, "loss": 1.7394, "step": 385400 }, { "epoch": 0.25, "learning_rate": 3.768812039705412e-05, "loss": 1.7452, "step": 385500 }, { "epoch": 0.25, "learning_rate": 3.768491834774256e-05, "loss": 1.7222, "step": 385600 }, { "epoch": 0.25, "learning_rate": 3.7681716298431e-05, "loss": 1.7327, "step": 385700 }, { "epoch": 0.25, "learning_rate": 3.767851424911944e-05, "loss": 1.7653, "step": 385800 }, { "epoch": 0.25, "learning_rate": 3.767531219980788e-05, "loss": 1.7476, "step": 385900 }, { "epoch": 0.25, "learning_rate": 3.7672110150496323e-05, "loss": 1.7406, "step": 386000 }, { "epoch": 0.25, "eval_loss": 1.7449015378952026, "eval_runtime": 94.911, "eval_samples_per_second": 105.362, "eval_steps_per_second": 6.585, "step": 386000 }, { "epoch": 0.25, "learning_rate": 3.7668908101184756e-05, "loss": 1.7431, "step": 386100 }, { "epoch": 0.25, "learning_rate": 3.76657060518732e-05, "loss": 1.7393, "step": 386200 }, { "epoch": 0.25, "learning_rate": 3.766250400256164e-05, "loss": 1.7462, "step": 386300 }, { "epoch": 0.25, "learning_rate": 3.765930195325008e-05, "loss": 1.7449, "step": 386400 }, { "epoch": 0.25, "learning_rate": 3.765609990393852e-05, "loss": 1.7395, "step": 386500 }, { "epoch": 0.25, "learning_rate": 3.765289785462696e-05, "loss": 1.7343, "step": 386600 }, { "epoch": 0.25, "learning_rate": 3.76496958053154e-05, "loss": 1.7222, "step": 386700 }, { "epoch": 0.25, "learning_rate": 3.764649375600384e-05, "loss": 1.7314, "step": 386800 }, { "epoch": 0.25, "learning_rate": 3.764329170669229e-05, "loss": 1.7441, "step": 386900 }, { "epoch": 0.25, "learning_rate": 3.764008965738073e-05, "loss": 1.7308, "step": 387000 }, { "epoch": 0.25, "eval_loss": 1.7431215047836304, "eval_runtime": 90.9163, "eval_samples_per_second": 109.991, "eval_steps_per_second": 6.874, "step": 387000 }, { "epoch": 0.25, "learning_rate": 3.763688760806917e-05, "loss": 1.7528, "step": 387100 }, { "epoch": 0.25, "learning_rate": 3.763368555875761e-05, "loss": 1.7133, "step": 387200 }, { "epoch": 0.25, "learning_rate": 3.763048350944605e-05, "loss": 1.7297, "step": 387300 }, { "epoch": 0.25, "learning_rate": 3.7627281460134487e-05, "loss": 1.7387, "step": 387400 }, { "epoch": 0.25, "learning_rate": 3.7624079410822926e-05, "loss": 1.7391, "step": 387500 }, { "epoch": 0.25, "learning_rate": 3.762087736151137e-05, "loss": 1.7536, "step": 387600 }, { "epoch": 0.25, "learning_rate": 3.7617675312199806e-05, "loss": 1.7292, "step": 387700 }, { "epoch": 0.25, "learning_rate": 3.761447326288825e-05, "loss": 1.7337, "step": 387800 }, { "epoch": 0.25, "learning_rate": 3.761127121357669e-05, "loss": 1.7455, "step": 387900 }, { "epoch": 0.25, "learning_rate": 3.760806916426513e-05, "loss": 1.7335, "step": 388000 }, { "epoch": 0.25, "eval_loss": 1.7444692850112915, "eval_runtime": 89.867, "eval_samples_per_second": 111.276, "eval_steps_per_second": 6.955, "step": 388000 }, { "epoch": 0.25, "learning_rate": 3.760486711495357e-05, "loss": 1.7178, "step": 388100 }, { "epoch": 0.25, "learning_rate": 3.760166506564201e-05, "loss": 1.7394, "step": 388200 }, { "epoch": 0.25, "learning_rate": 3.759846301633045e-05, "loss": 1.7491, "step": 388300 }, { "epoch": 0.25, "learning_rate": 3.759526096701889e-05, "loss": 1.746, "step": 388400 }, { "epoch": 0.25, "learning_rate": 3.759205891770734e-05, "loss": 1.7475, "step": 388500 }, { "epoch": 0.25, "learning_rate": 3.758885686839578e-05, "loss": 1.7327, "step": 388600 }, { "epoch": 0.25, "learning_rate": 3.758565481908422e-05, "loss": 1.7499, "step": 388700 }, { "epoch": 0.25, "learning_rate": 3.7582452769772657e-05, "loss": 1.7479, "step": 388800 }, { "epoch": 0.25, "learning_rate": 3.7579250720461096e-05, "loss": 1.7304, "step": 388900 }, { "epoch": 0.25, "learning_rate": 3.7576048671149536e-05, "loss": 1.7412, "step": 389000 }, { "epoch": 0.25, "eval_loss": 1.74239182472229, "eval_runtime": 89.2496, "eval_samples_per_second": 112.045, "eval_steps_per_second": 7.003, "step": 389000 }, { "epoch": 0.25, "learning_rate": 3.7572846621837976e-05, "loss": 1.7188, "step": 389100 }, { "epoch": 0.25, "learning_rate": 3.756964457252642e-05, "loss": 1.7463, "step": 389200 }, { "epoch": 0.25, "learning_rate": 3.756644252321486e-05, "loss": 1.7348, "step": 389300 }, { "epoch": 0.25, "learning_rate": 3.75632404739033e-05, "loss": 1.7432, "step": 389400 }, { "epoch": 0.25, "learning_rate": 3.756003842459174e-05, "loss": 1.734, "step": 389500 }, { "epoch": 0.25, "learning_rate": 3.755683637528018e-05, "loss": 1.7118, "step": 389600 }, { "epoch": 0.25, "learning_rate": 3.755363432596862e-05, "loss": 1.7404, "step": 389700 }, { "epoch": 0.25, "learning_rate": 3.755043227665706e-05, "loss": 1.746, "step": 389800 }, { "epoch": 0.25, "learning_rate": 3.75472302273455e-05, "loss": 1.7301, "step": 389900 }, { "epoch": 0.25, "learning_rate": 3.754402817803394e-05, "loss": 1.7354, "step": 390000 }, { "epoch": 0.25, "eval_loss": 1.743432641029358, "eval_runtime": 92.5804, "eval_samples_per_second": 108.014, "eval_steps_per_second": 6.751, "step": 390000 }, { "epoch": 0.25, "learning_rate": 3.754082612872239e-05, "loss": 1.7233, "step": 390100 }, { "epoch": 0.25, "learning_rate": 3.7537624079410826e-05, "loss": 1.758, "step": 390200 }, { "epoch": 0.25, "learning_rate": 3.7534422030099266e-05, "loss": 1.762, "step": 390300 }, { "epoch": 0.25, "learning_rate": 3.7531219980787706e-05, "loss": 1.7299, "step": 390400 }, { "epoch": 0.25, "learning_rate": 3.7528017931476146e-05, "loss": 1.736, "step": 390500 }, { "epoch": 0.25, "learning_rate": 3.7524815882164585e-05, "loss": 1.756, "step": 390600 }, { "epoch": 0.25, "learning_rate": 3.7521613832853025e-05, "loss": 1.7351, "step": 390700 }, { "epoch": 0.25, "learning_rate": 3.751841178354147e-05, "loss": 1.7211, "step": 390800 }, { "epoch": 0.25, "learning_rate": 3.751520973422991e-05, "loss": 1.7299, "step": 390900 }, { "epoch": 0.25, "learning_rate": 3.751200768491835e-05, "loss": 1.743, "step": 391000 }, { "epoch": 0.25, "eval_loss": 1.742842197418213, "eval_runtime": 94.4537, "eval_samples_per_second": 105.872, "eval_steps_per_second": 6.617, "step": 391000 }, { "epoch": 0.25, "learning_rate": 3.750880563560679e-05, "loss": 1.7454, "step": 391100 }, { "epoch": 0.25, "learning_rate": 3.750560358629523e-05, "loss": 1.739, "step": 391200 }, { "epoch": 0.25, "learning_rate": 3.750240153698367e-05, "loss": 1.7459, "step": 391300 }, { "epoch": 0.25, "learning_rate": 3.749919948767211e-05, "loss": 1.7438, "step": 391400 }, { "epoch": 0.25, "learning_rate": 3.749599743836055e-05, "loss": 1.7329, "step": 391500 }, { "epoch": 0.25, "learning_rate": 3.7492795389048996e-05, "loss": 1.7336, "step": 391600 }, { "epoch": 0.25, "learning_rate": 3.7489593339737436e-05, "loss": 1.726, "step": 391700 }, { "epoch": 0.25, "learning_rate": 3.7486391290425876e-05, "loss": 1.7328, "step": 391800 }, { "epoch": 0.25, "learning_rate": 3.7483189241114316e-05, "loss": 1.759, "step": 391900 }, { "epoch": 0.25, "learning_rate": 3.7479987191802755e-05, "loss": 1.7556, "step": 392000 }, { "epoch": 0.25, "eval_loss": 1.7428531646728516, "eval_runtime": 90.917, "eval_samples_per_second": 109.99, "eval_steps_per_second": 6.874, "step": 392000 }, { "epoch": 0.25, "learning_rate": 3.7476785142491195e-05, "loss": 1.7323, "step": 392100 }, { "epoch": 0.25, "learning_rate": 3.7473583093179635e-05, "loss": 1.7414, "step": 392200 }, { "epoch": 0.25, "learning_rate": 3.7470381043868074e-05, "loss": 1.7397, "step": 392300 }, { "epoch": 0.25, "learning_rate": 3.746717899455652e-05, "loss": 1.7296, "step": 392400 }, { "epoch": 0.25, "learning_rate": 3.746397694524496e-05, "loss": 1.7414, "step": 392500 }, { "epoch": 0.25, "learning_rate": 3.74607748959334e-05, "loss": 1.722, "step": 392600 }, { "epoch": 0.25, "learning_rate": 3.745757284662184e-05, "loss": 1.7311, "step": 392700 }, { "epoch": 0.25, "learning_rate": 3.745437079731028e-05, "loss": 1.762, "step": 392800 }, { "epoch": 0.25, "learning_rate": 3.745116874799872e-05, "loss": 1.7381, "step": 392900 }, { "epoch": 0.25, "learning_rate": 3.744796669868716e-05, "loss": 1.756, "step": 393000 }, { "epoch": 0.25, "eval_loss": 1.7443095445632935, "eval_runtime": 92.2107, "eval_samples_per_second": 108.447, "eval_steps_per_second": 6.778, "step": 393000 }, { "epoch": 0.25, "learning_rate": 3.74447646493756e-05, "loss": 1.7402, "step": 393100 }, { "epoch": 0.25, "learning_rate": 3.7441562600064046e-05, "loss": 1.7324, "step": 393200 }, { "epoch": 0.25, "learning_rate": 3.7438360550752485e-05, "loss": 1.7516, "step": 393300 }, { "epoch": 0.25, "learning_rate": 3.7435158501440925e-05, "loss": 1.7381, "step": 393400 }, { "epoch": 0.25, "learning_rate": 3.7431956452129365e-05, "loss": 1.7239, "step": 393500 }, { "epoch": 0.25, "learning_rate": 3.7428754402817805e-05, "loss": 1.7362, "step": 393600 }, { "epoch": 0.25, "learning_rate": 3.7425552353506244e-05, "loss": 1.7207, "step": 393700 }, { "epoch": 0.25, "learning_rate": 3.7422350304194684e-05, "loss": 1.7265, "step": 393800 }, { "epoch": 0.25, "learning_rate": 3.741914825488313e-05, "loss": 1.7275, "step": 393900 }, { "epoch": 0.25, "learning_rate": 3.741594620557157e-05, "loss": 1.7512, "step": 394000 }, { "epoch": 0.25, "eval_loss": 1.7443969249725342, "eval_runtime": 90.1244, "eval_samples_per_second": 110.958, "eval_steps_per_second": 6.935, "step": 394000 }, { "epoch": 0.25, "learning_rate": 3.741274415626001e-05, "loss": 1.7119, "step": 394100 }, { "epoch": 0.25, "learning_rate": 3.740954210694845e-05, "loss": 1.7308, "step": 394200 }, { "epoch": 0.25, "learning_rate": 3.740634005763689e-05, "loss": 1.7439, "step": 394300 }, { "epoch": 0.25, "learning_rate": 3.740313800832533e-05, "loss": 1.7003, "step": 394400 }, { "epoch": 0.25, "learning_rate": 3.739993595901377e-05, "loss": 1.7406, "step": 394500 }, { "epoch": 0.25, "learning_rate": 3.739673390970221e-05, "loss": 1.7371, "step": 394600 }, { "epoch": 0.25, "learning_rate": 3.739353186039065e-05, "loss": 1.7185, "step": 394700 }, { "epoch": 0.25, "learning_rate": 3.7390329811079095e-05, "loss": 1.7432, "step": 394800 }, { "epoch": 0.25, "learning_rate": 3.7387127761767535e-05, "loss": 1.7411, "step": 394900 }, { "epoch": 0.25, "learning_rate": 3.7383925712455975e-05, "loss": 1.7548, "step": 395000 }, { "epoch": 0.25, "eval_loss": 1.7400634288787842, "eval_runtime": 92.4125, "eval_samples_per_second": 108.21, "eval_steps_per_second": 6.763, "step": 395000 }, { "epoch": 0.25, "learning_rate": 3.7380723663144414e-05, "loss": 1.7334, "step": 395100 }, { "epoch": 0.25, "learning_rate": 3.7377521613832854e-05, "loss": 1.7457, "step": 395200 }, { "epoch": 0.25, "learning_rate": 3.7374319564521294e-05, "loss": 1.7358, "step": 395300 }, { "epoch": 0.25, "learning_rate": 3.7371117515209733e-05, "loss": 1.7351, "step": 395400 }, { "epoch": 0.25, "learning_rate": 3.736791546589818e-05, "loss": 1.7492, "step": 395500 }, { "epoch": 0.25, "learning_rate": 3.736471341658662e-05, "loss": 1.7186, "step": 395600 }, { "epoch": 0.25, "learning_rate": 3.736151136727506e-05, "loss": 1.7364, "step": 395700 }, { "epoch": 0.25, "learning_rate": 3.73583093179635e-05, "loss": 1.7497, "step": 395800 }, { "epoch": 0.25, "learning_rate": 3.735510726865194e-05, "loss": 1.7266, "step": 395900 }, { "epoch": 0.25, "learning_rate": 3.735190521934038e-05, "loss": 1.749, "step": 396000 }, { "epoch": 0.25, "eval_loss": 1.7427273988723755, "eval_runtime": 93.3544, "eval_samples_per_second": 107.119, "eval_steps_per_second": 6.695, "step": 396000 }, { "epoch": 0.25, "learning_rate": 3.734870317002882e-05, "loss": 1.7434, "step": 396100 }, { "epoch": 0.25, "learning_rate": 3.7345501120717265e-05, "loss": 1.726, "step": 396200 }, { "epoch": 0.25, "learning_rate": 3.73422990714057e-05, "loss": 1.7147, "step": 396300 }, { "epoch": 0.25, "learning_rate": 3.7339097022094144e-05, "loss": 1.7421, "step": 396400 }, { "epoch": 0.25, "learning_rate": 3.7335894972782584e-05, "loss": 1.7326, "step": 396500 }, { "epoch": 0.25, "learning_rate": 3.7332692923471024e-05, "loss": 1.7206, "step": 396600 }, { "epoch": 0.25, "learning_rate": 3.7329490874159464e-05, "loss": 1.733, "step": 396700 }, { "epoch": 0.25, "learning_rate": 3.73262888248479e-05, "loss": 1.7289, "step": 396800 }, { "epoch": 0.25, "learning_rate": 3.732308677553635e-05, "loss": 1.7268, "step": 396900 }, { "epoch": 0.25, "learning_rate": 3.731988472622478e-05, "loss": 1.7357, "step": 397000 }, { "epoch": 0.25, "eval_loss": 1.741363286972046, "eval_runtime": 89.5022, "eval_samples_per_second": 111.729, "eval_steps_per_second": 6.983, "step": 397000 }, { "epoch": 0.25, "learning_rate": 3.731668267691323e-05, "loss": 1.7248, "step": 397100 }, { "epoch": 0.25, "learning_rate": 3.731348062760167e-05, "loss": 1.7313, "step": 397200 }, { "epoch": 0.25, "learning_rate": 3.731027857829011e-05, "loss": 1.7255, "step": 397300 }, { "epoch": 0.25, "learning_rate": 3.730707652897855e-05, "loss": 1.7389, "step": 397400 }, { "epoch": 0.25, "learning_rate": 3.730387447966699e-05, "loss": 1.7359, "step": 397500 }, { "epoch": 0.25, "learning_rate": 3.730067243035543e-05, "loss": 1.7185, "step": 397600 }, { "epoch": 0.25, "learning_rate": 3.729747038104387e-05, "loss": 1.7129, "step": 397700 }, { "epoch": 0.25, "learning_rate": 3.7294268331732314e-05, "loss": 1.7236, "step": 397800 }, { "epoch": 0.25, "learning_rate": 3.729106628242075e-05, "loss": 1.7413, "step": 397900 }, { "epoch": 0.25, "learning_rate": 3.7287864233109194e-05, "loss": 1.7184, "step": 398000 }, { "epoch": 0.25, "eval_loss": 1.7437692880630493, "eval_runtime": 89.9476, "eval_samples_per_second": 111.176, "eval_steps_per_second": 6.948, "step": 398000 }, { "epoch": 0.25, "learning_rate": 3.7284662183797634e-05, "loss": 1.7396, "step": 398100 }, { "epoch": 0.25, "learning_rate": 3.728146013448607e-05, "loss": 1.7515, "step": 398200 }, { "epoch": 0.25, "learning_rate": 3.727825808517451e-05, "loss": 1.7242, "step": 398300 }, { "epoch": 0.25, "learning_rate": 3.727505603586295e-05, "loss": 1.748, "step": 398400 }, { "epoch": 0.26, "learning_rate": 3.72718539865514e-05, "loss": 1.7284, "step": 398500 }, { "epoch": 0.26, "learning_rate": 3.726865193723983e-05, "loss": 1.7356, "step": 398600 }, { "epoch": 0.26, "learning_rate": 3.726544988792828e-05, "loss": 1.7452, "step": 398700 }, { "epoch": 0.26, "learning_rate": 3.726224783861672e-05, "loss": 1.7173, "step": 398800 }, { "epoch": 0.26, "learning_rate": 3.725904578930516e-05, "loss": 1.7324, "step": 398900 }, { "epoch": 0.26, "learning_rate": 3.72558437399936e-05, "loss": 1.7219, "step": 399000 }, { "epoch": 0.26, "eval_loss": 1.7428277730941772, "eval_runtime": 92.6594, "eval_samples_per_second": 107.922, "eval_steps_per_second": 6.745, "step": 399000 }, { "epoch": 0.26, "learning_rate": 3.725264169068204e-05, "loss": 1.746, "step": 399100 }, { "epoch": 0.26, "learning_rate": 3.7249439641370484e-05, "loss": 1.7429, "step": 399200 }, { "epoch": 0.26, "learning_rate": 3.724623759205892e-05, "loss": 1.7294, "step": 399300 }, { "epoch": 0.26, "learning_rate": 3.7243035542747364e-05, "loss": 1.7204, "step": 399400 }, { "epoch": 0.26, "learning_rate": 3.72398334934358e-05, "loss": 1.7207, "step": 399500 }, { "epoch": 0.26, "learning_rate": 3.723663144412424e-05, "loss": 1.7343, "step": 399600 }, { "epoch": 0.26, "learning_rate": 3.723342939481268e-05, "loss": 1.7166, "step": 399700 }, { "epoch": 0.26, "learning_rate": 3.723022734550112e-05, "loss": 1.7328, "step": 399800 }, { "epoch": 0.26, "learning_rate": 3.722702529618956e-05, "loss": 1.7325, "step": 399900 }, { "epoch": 0.26, "learning_rate": 3.7223823246878e-05, "loss": 1.7201, "step": 400000 }, { "epoch": 0.26, "eval_loss": 1.7424557209014893, "eval_runtime": 96.5057, "eval_samples_per_second": 103.621, "eval_steps_per_second": 6.476, "step": 400000 }, { "epoch": 0.26, "learning_rate": 3.722062119756645e-05, "loss": 1.7357, "step": 400100 }, { "epoch": 0.26, "learning_rate": 3.721741914825488e-05, "loss": 1.6981, "step": 400200 }, { "epoch": 0.26, "learning_rate": 3.721421709894333e-05, "loss": 1.7111, "step": 400300 }, { "epoch": 0.26, "learning_rate": 3.721101504963176e-05, "loss": 1.7339, "step": 400400 }, { "epoch": 0.26, "learning_rate": 3.720781300032021e-05, "loss": 1.7355, "step": 400500 }, { "epoch": 0.26, "learning_rate": 3.720461095100865e-05, "loss": 1.7334, "step": 400600 }, { "epoch": 0.26, "learning_rate": 3.720140890169709e-05, "loss": 1.74, "step": 400700 }, { "epoch": 0.26, "learning_rate": 3.7198206852385534e-05, "loss": 1.7281, "step": 400800 }, { "epoch": 0.26, "learning_rate": 3.7195004803073967e-05, "loss": 1.7323, "step": 400900 }, { "epoch": 0.26, "learning_rate": 3.719180275376241e-05, "loss": 1.7262, "step": 401000 }, { "epoch": 0.26, "eval_loss": 1.745850920677185, "eval_runtime": 93.7063, "eval_samples_per_second": 106.716, "eval_steps_per_second": 6.67, "step": 401000 }, { "epoch": 0.26, "learning_rate": 3.7188600704450846e-05, "loss": 1.7449, "step": 401100 }, { "epoch": 0.26, "learning_rate": 3.718539865513929e-05, "loss": 1.7219, "step": 401200 }, { "epoch": 0.26, "learning_rate": 3.718219660582773e-05, "loss": 1.7354, "step": 401300 }, { "epoch": 0.26, "learning_rate": 3.717899455651617e-05, "loss": 1.724, "step": 401400 }, { "epoch": 0.26, "learning_rate": 3.717579250720462e-05, "loss": 1.735, "step": 401500 }, { "epoch": 0.26, "learning_rate": 3.717259045789305e-05, "loss": 1.7212, "step": 401600 }, { "epoch": 0.26, "learning_rate": 3.71693884085815e-05, "loss": 1.7332, "step": 401700 }, { "epoch": 0.26, "learning_rate": 3.716618635926993e-05, "loss": 1.7294, "step": 401800 }, { "epoch": 0.26, "learning_rate": 3.716298430995838e-05, "loss": 1.7494, "step": 401900 }, { "epoch": 0.26, "learning_rate": 3.715978226064681e-05, "loss": 1.7343, "step": 402000 }, { "epoch": 0.26, "eval_loss": 1.7432432174682617, "eval_runtime": 91.1398, "eval_samples_per_second": 109.721, "eval_steps_per_second": 6.858, "step": 402000 }, { "epoch": 0.26, "learning_rate": 3.715658021133526e-05, "loss": 1.7078, "step": 402100 }, { "epoch": 0.26, "learning_rate": 3.71533781620237e-05, "loss": 1.7351, "step": 402200 }, { "epoch": 0.26, "learning_rate": 3.7150176112712136e-05, "loss": 1.7318, "step": 402300 }, { "epoch": 0.26, "learning_rate": 3.714697406340058e-05, "loss": 1.739, "step": 402400 }, { "epoch": 0.26, "learning_rate": 3.7143772014089016e-05, "loss": 1.7354, "step": 402500 }, { "epoch": 0.26, "learning_rate": 3.714056996477746e-05, "loss": 1.7219, "step": 402600 }, { "epoch": 0.26, "learning_rate": 3.7137367915465895e-05, "loss": 1.7321, "step": 402700 }, { "epoch": 0.26, "learning_rate": 3.713416586615434e-05, "loss": 1.7344, "step": 402800 }, { "epoch": 0.26, "learning_rate": 3.713096381684278e-05, "loss": 1.7372, "step": 402900 }, { "epoch": 0.26, "learning_rate": 3.712776176753122e-05, "loss": 1.7521, "step": 403000 }, { "epoch": 0.26, "eval_loss": 1.742187738418579, "eval_runtime": 90.0512, "eval_samples_per_second": 111.048, "eval_steps_per_second": 6.94, "step": 403000 }, { "epoch": 0.26, "learning_rate": 3.712455971821967e-05, "loss": 1.7317, "step": 403100 }, { "epoch": 0.26, "learning_rate": 3.71213576689081e-05, "loss": 1.7331, "step": 403200 }, { "epoch": 0.26, "learning_rate": 3.711815561959655e-05, "loss": 1.7575, "step": 403300 }, { "epoch": 0.26, "learning_rate": 3.711495357028498e-05, "loss": 1.7216, "step": 403400 }, { "epoch": 0.26, "learning_rate": 3.711175152097343e-05, "loss": 1.7242, "step": 403500 }, { "epoch": 0.26, "learning_rate": 3.710854947166186e-05, "loss": 1.7285, "step": 403600 }, { "epoch": 0.26, "learning_rate": 3.7105347422350306e-05, "loss": 1.7174, "step": 403700 }, { "epoch": 0.26, "learning_rate": 3.7102145373038746e-05, "loss": 1.7156, "step": 403800 }, { "epoch": 0.26, "learning_rate": 3.7098943323727186e-05, "loss": 1.7502, "step": 403900 }, { "epoch": 0.26, "learning_rate": 3.709574127441563e-05, "loss": 1.7457, "step": 404000 }, { "epoch": 0.26, "eval_loss": 1.7431570291519165, "eval_runtime": 95.2097, "eval_samples_per_second": 105.031, "eval_steps_per_second": 6.564, "step": 404000 }, { "epoch": 0.26, "learning_rate": 3.7092539225104065e-05, "loss": 1.7283, "step": 404100 }, { "epoch": 0.26, "learning_rate": 3.708933717579251e-05, "loss": 1.7391, "step": 404200 }, { "epoch": 0.26, "learning_rate": 3.7086135126480945e-05, "loss": 1.7409, "step": 404300 }, { "epoch": 0.26, "learning_rate": 3.708293307716939e-05, "loss": 1.7373, "step": 404400 }, { "epoch": 0.26, "learning_rate": 3.707973102785783e-05, "loss": 1.7354, "step": 404500 }, { "epoch": 0.26, "learning_rate": 3.707652897854627e-05, "loss": 1.7605, "step": 404600 }, { "epoch": 0.26, "learning_rate": 3.707332692923472e-05, "loss": 1.724, "step": 404700 }, { "epoch": 0.26, "learning_rate": 3.707012487992315e-05, "loss": 1.7044, "step": 404800 }, { "epoch": 0.26, "learning_rate": 3.70669228306116e-05, "loss": 1.743, "step": 404900 }, { "epoch": 0.26, "learning_rate": 3.706372078130003e-05, "loss": 1.7315, "step": 405000 }, { "epoch": 0.26, "eval_loss": 1.7403249740600586, "eval_runtime": 95.1027, "eval_samples_per_second": 105.149, "eval_steps_per_second": 6.572, "step": 405000 }, { "epoch": 0.26, "learning_rate": 3.7060518731988476e-05, "loss": 1.7598, "step": 405100 }, { "epoch": 0.26, "learning_rate": 3.705731668267691e-05, "loss": 1.7207, "step": 405200 }, { "epoch": 0.26, "learning_rate": 3.7054114633365356e-05, "loss": 1.7435, "step": 405300 }, { "epoch": 0.26, "learning_rate": 3.7050912584053795e-05, "loss": 1.7328, "step": 405400 }, { "epoch": 0.26, "learning_rate": 3.7047710534742235e-05, "loss": 1.7344, "step": 405500 }, { "epoch": 0.26, "learning_rate": 3.704450848543068e-05, "loss": 1.7305, "step": 405600 }, { "epoch": 0.26, "learning_rate": 3.7041306436119115e-05, "loss": 1.7217, "step": 405700 }, { "epoch": 0.26, "learning_rate": 3.703810438680756e-05, "loss": 1.7329, "step": 405800 }, { "epoch": 0.26, "learning_rate": 3.7034902337495994e-05, "loss": 1.7111, "step": 405900 }, { "epoch": 0.26, "learning_rate": 3.703170028818444e-05, "loss": 1.7314, "step": 406000 }, { "epoch": 0.26, "eval_loss": 1.7425638437271118, "eval_runtime": 92.6817, "eval_samples_per_second": 107.896, "eval_steps_per_second": 6.744, "step": 406000 }, { "epoch": 0.26, "learning_rate": 3.702849823887288e-05, "loss": 1.7112, "step": 406100 }, { "epoch": 0.26, "learning_rate": 3.702529618956132e-05, "loss": 1.7539, "step": 406200 }, { "epoch": 0.26, "learning_rate": 3.702209414024977e-05, "loss": 1.7443, "step": 406300 }, { "epoch": 0.26, "learning_rate": 3.70188920909382e-05, "loss": 1.7299, "step": 406400 }, { "epoch": 0.26, "learning_rate": 3.7015690041626646e-05, "loss": 1.7378, "step": 406500 }, { "epoch": 0.26, "learning_rate": 3.701248799231508e-05, "loss": 1.7321, "step": 406600 }, { "epoch": 0.26, "learning_rate": 3.7009285943003526e-05, "loss": 1.7215, "step": 406700 }, { "epoch": 0.26, "learning_rate": 3.7006083893691965e-05, "loss": 1.7301, "step": 406800 }, { "epoch": 0.26, "learning_rate": 3.7002881844380405e-05, "loss": 1.7252, "step": 406900 }, { "epoch": 0.26, "learning_rate": 3.6999679795068845e-05, "loss": 1.7379, "step": 407000 }, { "epoch": 0.26, "eval_loss": 1.7437089681625366, "eval_runtime": 91.1659, "eval_samples_per_second": 109.69, "eval_steps_per_second": 6.856, "step": 407000 }, { "epoch": 0.26, "learning_rate": 3.6996477745757285e-05, "loss": 1.7231, "step": 407100 }, { "epoch": 0.26, "learning_rate": 3.699327569644573e-05, "loss": 1.7281, "step": 407200 }, { "epoch": 0.26, "learning_rate": 3.6990073647134164e-05, "loss": 1.7325, "step": 407300 }, { "epoch": 0.26, "learning_rate": 3.698687159782261e-05, "loss": 1.7326, "step": 407400 }, { "epoch": 0.26, "learning_rate": 3.6983669548511043e-05, "loss": 1.7254, "step": 407500 }, { "epoch": 0.26, "learning_rate": 3.698046749919949e-05, "loss": 1.7413, "step": 407600 }, { "epoch": 0.26, "learning_rate": 3.697726544988793e-05, "loss": 1.743, "step": 407700 }, { "epoch": 0.26, "learning_rate": 3.697406340057637e-05, "loss": 1.7276, "step": 407800 }, { "epoch": 0.26, "learning_rate": 3.6970861351264816e-05, "loss": 1.7463, "step": 407900 }, { "epoch": 0.26, "learning_rate": 3.696765930195325e-05, "loss": 1.7342, "step": 408000 }, { "epoch": 0.26, "eval_loss": 1.738871455192566, "eval_runtime": 91.6715, "eval_samples_per_second": 109.085, "eval_steps_per_second": 6.818, "step": 408000 }, { "epoch": 0.26, "learning_rate": 3.6964457252641695e-05, "loss": 1.7212, "step": 408100 }, { "epoch": 0.26, "learning_rate": 3.696125520333013e-05, "loss": 1.7274, "step": 408200 }, { "epoch": 0.26, "learning_rate": 3.6958053154018575e-05, "loss": 1.724, "step": 408300 }, { "epoch": 0.26, "learning_rate": 3.6954851104707015e-05, "loss": 1.7299, "step": 408400 }, { "epoch": 0.26, "learning_rate": 3.6951649055395454e-05, "loss": 1.7187, "step": 408500 }, { "epoch": 0.26, "learning_rate": 3.6948447006083894e-05, "loss": 1.7476, "step": 408600 }, { "epoch": 0.26, "learning_rate": 3.6945244956772334e-05, "loss": 1.7071, "step": 408700 }, { "epoch": 0.26, "learning_rate": 3.694204290746078e-05, "loss": 1.7314, "step": 408800 }, { "epoch": 0.26, "learning_rate": 3.693884085814921e-05, "loss": 1.748, "step": 408900 }, { "epoch": 0.26, "learning_rate": 3.693563880883766e-05, "loss": 1.74, "step": 409000 }, { "epoch": 0.26, "eval_loss": 1.7422600984573364, "eval_runtime": 92.129, "eval_samples_per_second": 108.543, "eval_steps_per_second": 6.784, "step": 409000 }, { "epoch": 0.26, "learning_rate": 3.69324367595261e-05, "loss": 1.7317, "step": 409100 }, { "epoch": 0.26, "learning_rate": 3.692923471021454e-05, "loss": 1.7172, "step": 409200 }, { "epoch": 0.26, "learning_rate": 3.692603266090298e-05, "loss": 1.7431, "step": 409300 }, { "epoch": 0.26, "learning_rate": 3.692283061159142e-05, "loss": 1.707, "step": 409400 }, { "epoch": 0.26, "learning_rate": 3.6919628562279865e-05, "loss": 1.7491, "step": 409500 }, { "epoch": 0.26, "learning_rate": 3.69164265129683e-05, "loss": 1.7183, "step": 409600 }, { "epoch": 0.26, "learning_rate": 3.6913224463656745e-05, "loss": 1.7259, "step": 409700 }, { "epoch": 0.26, "learning_rate": 3.691002241434518e-05, "loss": 1.7272, "step": 409800 }, { "epoch": 0.26, "learning_rate": 3.6906820365033624e-05, "loss": 1.7271, "step": 409900 }, { "epoch": 0.26, "learning_rate": 3.6903618315722064e-05, "loss": 1.7484, "step": 410000 }, { "epoch": 0.26, "eval_loss": 1.7387560606002808, "eval_runtime": 89.3958, "eval_samples_per_second": 111.862, "eval_steps_per_second": 6.991, "step": 410000 }, { "epoch": 0.26, "learning_rate": 3.6900416266410504e-05, "loss": 1.7123, "step": 410100 }, { "epoch": 0.26, "learning_rate": 3.6897214217098944e-05, "loss": 1.7276, "step": 410200 }, { "epoch": 0.26, "learning_rate": 3.689401216778738e-05, "loss": 1.7325, "step": 410300 }, { "epoch": 0.26, "learning_rate": 3.689081011847583e-05, "loss": 1.7404, "step": 410400 }, { "epoch": 0.26, "learning_rate": 3.688760806916426e-05, "loss": 1.7427, "step": 410500 }, { "epoch": 0.26, "learning_rate": 3.688440601985271e-05, "loss": 1.7247, "step": 410600 }, { "epoch": 0.26, "learning_rate": 3.688120397054115e-05, "loss": 1.7143, "step": 410700 }, { "epoch": 0.26, "learning_rate": 3.687800192122959e-05, "loss": 1.7447, "step": 410800 }, { "epoch": 0.26, "learning_rate": 3.687479987191803e-05, "loss": 1.7136, "step": 410900 }, { "epoch": 0.26, "learning_rate": 3.687159782260647e-05, "loss": 1.7131, "step": 411000 }, { "epoch": 0.26, "eval_loss": 1.742553472518921, "eval_runtime": 88.7956, "eval_samples_per_second": 112.618, "eval_steps_per_second": 7.039, "step": 411000 }, { "epoch": 0.26, "learning_rate": 3.6868395773294915e-05, "loss": 1.7252, "step": 411100 }, { "epoch": 0.26, "learning_rate": 3.686519372398335e-05, "loss": 1.729, "step": 411200 }, { "epoch": 0.26, "learning_rate": 3.6861991674671794e-05, "loss": 1.7309, "step": 411300 }, { "epoch": 0.26, "learning_rate": 3.6858789625360234e-05, "loss": 1.7375, "step": 411400 }, { "epoch": 0.26, "learning_rate": 3.6855587576048674e-05, "loss": 1.7313, "step": 411500 }, { "epoch": 0.26, "learning_rate": 3.6852385526737113e-05, "loss": 1.7173, "step": 411600 }, { "epoch": 0.26, "learning_rate": 3.684918347742555e-05, "loss": 1.7433, "step": 411700 }, { "epoch": 0.26, "learning_rate": 3.684598142811399e-05, "loss": 1.7754, "step": 411800 }, { "epoch": 0.26, "learning_rate": 3.684277937880243e-05, "loss": 1.7253, "step": 411900 }, { "epoch": 0.26, "learning_rate": 3.683957732949088e-05, "loss": 1.7407, "step": 412000 }, { "epoch": 0.26, "eval_loss": 1.7423832416534424, "eval_runtime": 98.7259, "eval_samples_per_second": 101.291, "eval_steps_per_second": 6.331, "step": 412000 }, { "epoch": 0.26, "learning_rate": 3.683637528017931e-05, "loss": 1.7271, "step": 412100 }, { "epoch": 0.26, "learning_rate": 3.683317323086776e-05, "loss": 1.7233, "step": 412200 }, { "epoch": 0.26, "learning_rate": 3.68299711815562e-05, "loss": 1.7436, "step": 412300 }, { "epoch": 0.26, "learning_rate": 3.682676913224464e-05, "loss": 1.7, "step": 412400 }, { "epoch": 0.26, "learning_rate": 3.682356708293308e-05, "loss": 1.7351, "step": 412500 }, { "epoch": 0.26, "learning_rate": 3.682036503362152e-05, "loss": 1.7259, "step": 412600 }, { "epoch": 0.26, "learning_rate": 3.6817162984309964e-05, "loss": 1.7189, "step": 412700 }, { "epoch": 0.26, "learning_rate": 3.68139609349984e-05, "loss": 1.74, "step": 412800 }, { "epoch": 0.26, "learning_rate": 3.6810758885686844e-05, "loss": 1.7195, "step": 412900 }, { "epoch": 0.26, "learning_rate": 3.680755683637528e-05, "loss": 1.7042, "step": 413000 }, { "epoch": 0.26, "eval_loss": 1.7432442903518677, "eval_runtime": 94.8613, "eval_samples_per_second": 105.417, "eval_steps_per_second": 6.589, "step": 413000 }, { "epoch": 0.26, "learning_rate": 3.680435478706372e-05, "loss": 1.7363, "step": 413100 }, { "epoch": 0.26, "learning_rate": 3.680115273775216e-05, "loss": 1.7261, "step": 413200 }, { "epoch": 0.26, "learning_rate": 3.67979506884406e-05, "loss": 1.7407, "step": 413300 }, { "epoch": 0.26, "learning_rate": 3.679474863912904e-05, "loss": 1.7278, "step": 413400 }, { "epoch": 0.26, "learning_rate": 3.679154658981748e-05, "loss": 1.7256, "step": 413500 }, { "epoch": 0.26, "learning_rate": 3.678834454050593e-05, "loss": 1.732, "step": 413600 }, { "epoch": 0.26, "learning_rate": 3.678514249119437e-05, "loss": 1.7112, "step": 413700 }, { "epoch": 0.26, "learning_rate": 3.678194044188281e-05, "loss": 1.7223, "step": 413800 }, { "epoch": 0.26, "learning_rate": 3.677873839257125e-05, "loss": 1.7474, "step": 413900 }, { "epoch": 0.26, "learning_rate": 3.677553634325969e-05, "loss": 1.7387, "step": 414000 }, { "epoch": 0.26, "eval_loss": 1.7414915561676025, "eval_runtime": 90.3687, "eval_samples_per_second": 110.658, "eval_steps_per_second": 6.916, "step": 414000 }, { "epoch": 0.27, "learning_rate": 3.677233429394813e-05, "loss": 1.7191, "step": 414100 }, { "epoch": 0.27, "learning_rate": 3.676913224463657e-05, "loss": 1.7377, "step": 414200 }, { "epoch": 0.27, "learning_rate": 3.6765930195325013e-05, "loss": 1.7297, "step": 414300 }, { "epoch": 0.27, "learning_rate": 3.6762728146013446e-05, "loss": 1.7261, "step": 414400 }, { "epoch": 0.27, "learning_rate": 3.675952609670189e-05, "loss": 1.7337, "step": 414500 }, { "epoch": 0.27, "learning_rate": 3.675632404739033e-05, "loss": 1.737, "step": 414600 }, { "epoch": 0.27, "learning_rate": 3.675312199807877e-05, "loss": 1.7199, "step": 414700 }, { "epoch": 0.27, "learning_rate": 3.674991994876721e-05, "loss": 1.7403, "step": 414800 }, { "epoch": 0.27, "learning_rate": 3.674671789945565e-05, "loss": 1.7183, "step": 414900 }, { "epoch": 0.27, "learning_rate": 3.674351585014409e-05, "loss": 1.7233, "step": 415000 }, { "epoch": 0.27, "eval_loss": 1.742467999458313, "eval_runtime": 89.2091, "eval_samples_per_second": 112.096, "eval_steps_per_second": 7.006, "step": 415000 }, { "epoch": 0.27, "learning_rate": 3.674031380083253e-05, "loss": 1.74, "step": 415100 }, { "epoch": 0.27, "learning_rate": 3.673711175152098e-05, "loss": 1.7173, "step": 415200 }, { "epoch": 0.27, "learning_rate": 3.673390970220942e-05, "loss": 1.7518, "step": 415300 }, { "epoch": 0.27, "learning_rate": 3.673070765289786e-05, "loss": 1.7414, "step": 415400 }, { "epoch": 0.27, "learning_rate": 3.67275056035863e-05, "loss": 1.7358, "step": 415500 }, { "epoch": 0.27, "learning_rate": 3.672430355427474e-05, "loss": 1.7159, "step": 415600 }, { "epoch": 0.27, "learning_rate": 3.6721101504963177e-05, "loss": 1.7167, "step": 415700 }, { "epoch": 0.27, "learning_rate": 3.6717899455651616e-05, "loss": 1.7256, "step": 415800 }, { "epoch": 0.27, "learning_rate": 3.671469740634006e-05, "loss": 1.7164, "step": 415900 }, { "epoch": 0.27, "learning_rate": 3.67114953570285e-05, "loss": 1.7191, "step": 416000 }, { "epoch": 0.27, "eval_loss": 1.7406439781188965, "eval_runtime": 92.3048, "eval_samples_per_second": 108.337, "eval_steps_per_second": 6.771, "step": 416000 }, { "epoch": 0.27, "learning_rate": 3.670829330771694e-05, "loss": 1.7419, "step": 416100 }, { "epoch": 0.27, "learning_rate": 3.670509125840538e-05, "loss": 1.734, "step": 416200 }, { "epoch": 0.27, "learning_rate": 3.670188920909382e-05, "loss": 1.7173, "step": 416300 }, { "epoch": 0.27, "learning_rate": 3.669868715978226e-05, "loss": 1.7236, "step": 416400 }, { "epoch": 0.27, "learning_rate": 3.66954851104707e-05, "loss": 1.7285, "step": 416500 }, { "epoch": 0.27, "learning_rate": 3.669228306115914e-05, "loss": 1.7335, "step": 416600 }, { "epoch": 0.27, "learning_rate": 3.668908101184759e-05, "loss": 1.7006, "step": 416700 }, { "epoch": 0.27, "learning_rate": 3.668587896253603e-05, "loss": 1.7281, "step": 416800 }, { "epoch": 0.27, "learning_rate": 3.668267691322447e-05, "loss": 1.725, "step": 416900 }, { "epoch": 0.27, "learning_rate": 3.667947486391291e-05, "loss": 1.7293, "step": 417000 }, { "epoch": 0.27, "eval_loss": 1.742385745048523, "eval_runtime": 89.1056, "eval_samples_per_second": 112.226, "eval_steps_per_second": 7.014, "step": 417000 }, { "epoch": 0.27, "learning_rate": 3.6676272814601346e-05, "loss": 1.7365, "step": 417100 }, { "epoch": 0.27, "learning_rate": 3.6673070765289786e-05, "loss": 1.7144, "step": 417200 }, { "epoch": 0.27, "learning_rate": 3.6669868715978226e-05, "loss": 1.7211, "step": 417300 }, { "epoch": 0.27, "learning_rate": 3.6666666666666666e-05, "loss": 1.7188, "step": 417400 }, { "epoch": 0.27, "learning_rate": 3.666346461735511e-05, "loss": 1.7256, "step": 417500 }, { "epoch": 0.27, "learning_rate": 3.666026256804355e-05, "loss": 1.699, "step": 417600 }, { "epoch": 0.27, "learning_rate": 3.665706051873199e-05, "loss": 1.7269, "step": 417700 }, { "epoch": 0.27, "learning_rate": 3.665385846942043e-05, "loss": 1.7215, "step": 417800 }, { "epoch": 0.27, "learning_rate": 3.665065642010887e-05, "loss": 1.7379, "step": 417900 }, { "epoch": 0.27, "learning_rate": 3.664745437079731e-05, "loss": 1.7214, "step": 418000 }, { "epoch": 0.27, "eval_loss": 1.7417508363723755, "eval_runtime": 90.4412, "eval_samples_per_second": 110.569, "eval_steps_per_second": 6.911, "step": 418000 }, { "epoch": 0.27, "learning_rate": 3.664425232148575e-05, "loss": 1.7381, "step": 418100 }, { "epoch": 0.27, "learning_rate": 3.664105027217419e-05, "loss": 1.7162, "step": 418200 }, { "epoch": 0.27, "learning_rate": 3.663784822286264e-05, "loss": 1.7366, "step": 418300 }, { "epoch": 0.27, "learning_rate": 3.663464617355108e-05, "loss": 1.722, "step": 418400 }, { "epoch": 0.27, "learning_rate": 3.6631444124239516e-05, "loss": 1.7456, "step": 418500 }, { "epoch": 0.27, "learning_rate": 3.6628242074927956e-05, "loss": 1.7174, "step": 418600 }, { "epoch": 0.27, "learning_rate": 3.6625040025616396e-05, "loss": 1.7315, "step": 418700 }, { "epoch": 0.27, "learning_rate": 3.6621837976304836e-05, "loss": 1.7071, "step": 418800 }, { "epoch": 0.27, "learning_rate": 3.6618635926993275e-05, "loss": 1.7283, "step": 418900 }, { "epoch": 0.27, "learning_rate": 3.661543387768172e-05, "loss": 1.7145, "step": 419000 }, { "epoch": 0.27, "eval_loss": 1.7427804470062256, "eval_runtime": 90.7943, "eval_samples_per_second": 110.139, "eval_steps_per_second": 6.884, "step": 419000 }, { "epoch": 0.27, "learning_rate": 3.6612231828370155e-05, "loss": 1.7479, "step": 419100 }, { "epoch": 0.27, "learning_rate": 3.66090297790586e-05, "loss": 1.7191, "step": 419200 }, { "epoch": 0.27, "learning_rate": 3.660582772974704e-05, "loss": 1.72, "step": 419300 }, { "epoch": 0.27, "learning_rate": 3.660262568043548e-05, "loss": 1.7043, "step": 419400 }, { "epoch": 0.27, "learning_rate": 3.659942363112392e-05, "loss": 1.733, "step": 419500 }, { "epoch": 0.27, "learning_rate": 3.659622158181236e-05, "loss": 1.7391, "step": 419600 }, { "epoch": 0.27, "learning_rate": 3.65930195325008e-05, "loss": 1.7357, "step": 419700 }, { "epoch": 0.27, "learning_rate": 3.658981748318924e-05, "loss": 1.7186, "step": 419800 }, { "epoch": 0.27, "learning_rate": 3.6586615433877686e-05, "loss": 1.7261, "step": 419900 }, { "epoch": 0.27, "learning_rate": 3.6583413384566126e-05, "loss": 1.7196, "step": 420000 }, { "epoch": 0.27, "eval_loss": 1.7431228160858154, "eval_runtime": 93.1613, "eval_samples_per_second": 107.341, "eval_steps_per_second": 6.709, "step": 420000 }, { "epoch": 0.27, "learning_rate": 3.6580211335254566e-05, "loss": 1.7123, "step": 420100 }, { "epoch": 0.27, "learning_rate": 3.6577009285943006e-05, "loss": 1.7102, "step": 420200 }, { "epoch": 0.27, "learning_rate": 3.6573807236631445e-05, "loss": 1.7449, "step": 420300 }, { "epoch": 0.27, "learning_rate": 3.6570605187319885e-05, "loss": 1.7168, "step": 420400 }, { "epoch": 0.27, "learning_rate": 3.6567403138008325e-05, "loss": 1.7135, "step": 420500 }, { "epoch": 0.27, "learning_rate": 3.656420108869677e-05, "loss": 1.7235, "step": 420600 }, { "epoch": 0.27, "learning_rate": 3.6560999039385204e-05, "loss": 1.7311, "step": 420700 }, { "epoch": 0.27, "learning_rate": 3.655779699007365e-05, "loss": 1.7196, "step": 420800 }, { "epoch": 0.27, "learning_rate": 3.655459494076209e-05, "loss": 1.7131, "step": 420900 }, { "epoch": 0.27, "learning_rate": 3.655139289145053e-05, "loss": 1.7431, "step": 421000 }, { "epoch": 0.27, "eval_loss": 1.7413402795791626, "eval_runtime": 91.1239, "eval_samples_per_second": 109.741, "eval_steps_per_second": 6.859, "step": 421000 }, { "epoch": 0.27, "learning_rate": 3.654819084213897e-05, "loss": 1.7215, "step": 421100 }, { "epoch": 0.27, "learning_rate": 3.654498879282741e-05, "loss": 1.7183, "step": 421200 }, { "epoch": 0.27, "learning_rate": 3.6541786743515856e-05, "loss": 1.7348, "step": 421300 }, { "epoch": 0.27, "learning_rate": 3.653858469420429e-05, "loss": 1.7097, "step": 421400 }, { "epoch": 0.27, "learning_rate": 3.6535382644892736e-05, "loss": 1.7314, "step": 421500 }, { "epoch": 0.27, "learning_rate": 3.6532180595581175e-05, "loss": 1.7167, "step": 421600 }, { "epoch": 0.27, "learning_rate": 3.6528978546269615e-05, "loss": 1.7326, "step": 421700 }, { "epoch": 0.27, "learning_rate": 3.6525776496958055e-05, "loss": 1.7184, "step": 421800 }, { "epoch": 0.27, "learning_rate": 3.6522574447646495e-05, "loss": 1.7201, "step": 421900 }, { "epoch": 0.27, "learning_rate": 3.6519372398334934e-05, "loss": 1.7199, "step": 422000 }, { "epoch": 0.27, "eval_loss": 1.7434797286987305, "eval_runtime": 89.6594, "eval_samples_per_second": 111.533, "eval_steps_per_second": 6.971, "step": 422000 }, { "epoch": 0.27, "learning_rate": 3.6516170349023374e-05, "loss": 1.7287, "step": 422100 }, { "epoch": 0.27, "learning_rate": 3.651296829971182e-05, "loss": 1.7107, "step": 422200 }, { "epoch": 0.27, "learning_rate": 3.6509766250400254e-05, "loss": 1.7168, "step": 422300 }, { "epoch": 0.27, "learning_rate": 3.65065642010887e-05, "loss": 1.7166, "step": 422400 }, { "epoch": 0.27, "learning_rate": 3.650336215177714e-05, "loss": 1.7074, "step": 422500 }, { "epoch": 0.27, "learning_rate": 3.650016010246558e-05, "loss": 1.6925, "step": 422600 }, { "epoch": 0.27, "learning_rate": 3.649695805315402e-05, "loss": 1.7346, "step": 422700 }, { "epoch": 0.27, "learning_rate": 3.649375600384246e-05, "loss": 1.7353, "step": 422800 }, { "epoch": 0.27, "learning_rate": 3.6490553954530906e-05, "loss": 1.7107, "step": 422900 }, { "epoch": 0.27, "learning_rate": 3.648735190521934e-05, "loss": 1.7144, "step": 423000 }, { "epoch": 0.27, "eval_loss": 1.742095947265625, "eval_runtime": 92.0209, "eval_samples_per_second": 108.671, "eval_steps_per_second": 6.792, "step": 423000 }, { "epoch": 0.27, "learning_rate": 3.6484149855907785e-05, "loss": 1.7159, "step": 423100 }, { "epoch": 0.27, "learning_rate": 3.6480947806596225e-05, "loss": 1.7207, "step": 423200 }, { "epoch": 0.27, "learning_rate": 3.6477745757284665e-05, "loss": 1.7099, "step": 423300 }, { "epoch": 0.27, "learning_rate": 3.6474543707973104e-05, "loss": 1.7371, "step": 423400 }, { "epoch": 0.27, "learning_rate": 3.6471341658661544e-05, "loss": 1.7313, "step": 423500 }, { "epoch": 0.27, "learning_rate": 3.646813960934999e-05, "loss": 1.7167, "step": 423600 }, { "epoch": 0.27, "learning_rate": 3.6464937560038423e-05, "loss": 1.7277, "step": 423700 }, { "epoch": 0.27, "learning_rate": 3.646173551072687e-05, "loss": 1.7267, "step": 423800 }, { "epoch": 0.27, "learning_rate": 3.64585334614153e-05, "loss": 1.7166, "step": 423900 }, { "epoch": 0.27, "learning_rate": 3.645533141210375e-05, "loss": 1.7078, "step": 424000 }, { "epoch": 0.27, "eval_loss": 1.7427494525909424, "eval_runtime": 92.211, "eval_samples_per_second": 108.447, "eval_steps_per_second": 6.778, "step": 424000 }, { "epoch": 0.27, "learning_rate": 3.645212936279219e-05, "loss": 1.7069, "step": 424100 }, { "epoch": 0.27, "learning_rate": 3.644892731348063e-05, "loss": 1.7267, "step": 424200 }, { "epoch": 0.27, "learning_rate": 3.644572526416907e-05, "loss": 1.7257, "step": 424300 }, { "epoch": 0.27, "learning_rate": 3.644252321485751e-05, "loss": 1.6998, "step": 424400 }, { "epoch": 0.27, "learning_rate": 3.6439321165545955e-05, "loss": 1.7248, "step": 424500 }, { "epoch": 0.27, "learning_rate": 3.643611911623439e-05, "loss": 1.7105, "step": 424600 }, { "epoch": 0.27, "learning_rate": 3.6432917066922834e-05, "loss": 1.7382, "step": 424700 }, { "epoch": 0.27, "learning_rate": 3.6429715017611274e-05, "loss": 1.7165, "step": 424800 }, { "epoch": 0.27, "learning_rate": 3.6426512968299714e-05, "loss": 1.7103, "step": 424900 }, { "epoch": 0.27, "learning_rate": 3.6423310918988154e-05, "loss": 1.7159, "step": 425000 }, { "epoch": 0.27, "eval_loss": 1.742501974105835, "eval_runtime": 91.0843, "eval_samples_per_second": 109.788, "eval_steps_per_second": 6.862, "step": 425000 }, { "epoch": 0.27, "learning_rate": 3.642010886967659e-05, "loss": 1.7286, "step": 425100 }, { "epoch": 0.27, "learning_rate": 3.641690682036504e-05, "loss": 1.7375, "step": 425200 }, { "epoch": 0.27, "learning_rate": 3.641370477105347e-05, "loss": 1.708, "step": 425300 }, { "epoch": 0.27, "learning_rate": 3.641050272174192e-05, "loss": 1.719, "step": 425400 }, { "epoch": 0.27, "learning_rate": 3.640730067243035e-05, "loss": 1.7283, "step": 425500 }, { "epoch": 0.27, "learning_rate": 3.64040986231188e-05, "loss": 1.6792, "step": 425600 }, { "epoch": 0.27, "learning_rate": 3.640089657380724e-05, "loss": 1.7081, "step": 425700 }, { "epoch": 0.27, "learning_rate": 3.639769452449568e-05, "loss": 1.7144, "step": 425800 }, { "epoch": 0.27, "learning_rate": 3.6394492475184125e-05, "loss": 1.7359, "step": 425900 }, { "epoch": 0.27, "learning_rate": 3.639129042587256e-05, "loss": 1.731, "step": 426000 }, { "epoch": 0.27, "eval_loss": 1.7394205331802368, "eval_runtime": 93.3082, "eval_samples_per_second": 107.172, "eval_steps_per_second": 6.698, "step": 426000 }, { "epoch": 0.27, "learning_rate": 3.6388088376561004e-05, "loss": 1.7421, "step": 426100 }, { "epoch": 0.27, "learning_rate": 3.638488632724944e-05, "loss": 1.7293, "step": 426200 }, { "epoch": 0.27, "learning_rate": 3.6381684277937884e-05, "loss": 1.722, "step": 426300 }, { "epoch": 0.27, "learning_rate": 3.6378482228626324e-05, "loss": 1.7135, "step": 426400 }, { "epoch": 0.27, "learning_rate": 3.637528017931476e-05, "loss": 1.7103, "step": 426500 }, { "epoch": 0.27, "learning_rate": 3.637207813000321e-05, "loss": 1.7198, "step": 426600 }, { "epoch": 0.27, "learning_rate": 3.636887608069164e-05, "loss": 1.7269, "step": 426700 }, { "epoch": 0.27, "learning_rate": 3.636567403138009e-05, "loss": 1.7056, "step": 426800 }, { "epoch": 0.27, "learning_rate": 3.636247198206852e-05, "loss": 1.7294, "step": 426900 }, { "epoch": 0.27, "learning_rate": 3.635926993275697e-05, "loss": 1.7058, "step": 427000 }, { "epoch": 0.27, "eval_loss": 1.7409863471984863, "eval_runtime": 94.5472, "eval_samples_per_second": 105.767, "eval_steps_per_second": 6.61, "step": 427000 }, { "epoch": 0.27, "learning_rate": 3.63560678834454e-05, "loss": 1.7164, "step": 427100 }, { "epoch": 0.27, "learning_rate": 3.635286583413385e-05, "loss": 1.7229, "step": 427200 }, { "epoch": 0.27, "learning_rate": 3.634966378482229e-05, "loss": 1.7238, "step": 427300 }, { "epoch": 0.27, "learning_rate": 3.634646173551073e-05, "loss": 1.741, "step": 427400 }, { "epoch": 0.27, "learning_rate": 3.6343259686199174e-05, "loss": 1.7268, "step": 427500 }, { "epoch": 0.27, "learning_rate": 3.634005763688761e-05, "loss": 1.7367, "step": 427600 }, { "epoch": 0.27, "learning_rate": 3.6336855587576054e-05, "loss": 1.7294, "step": 427700 }, { "epoch": 0.27, "learning_rate": 3.633365353826449e-05, "loss": 1.7254, "step": 427800 }, { "epoch": 0.27, "learning_rate": 3.633045148895293e-05, "loss": 1.719, "step": 427900 }, { "epoch": 0.27, "learning_rate": 3.632724943964137e-05, "loss": 1.7331, "step": 428000 }, { "epoch": 0.27, "eval_loss": 1.7419236898422241, "eval_runtime": 90.0348, "eval_samples_per_second": 111.068, "eval_steps_per_second": 6.942, "step": 428000 }, { "epoch": 0.27, "learning_rate": 3.632404739032981e-05, "loss": 1.7278, "step": 428100 }, { "epoch": 0.27, "learning_rate": 3.632084534101826e-05, "loss": 1.7168, "step": 428200 }, { "epoch": 0.27, "learning_rate": 3.631764329170669e-05, "loss": 1.7138, "step": 428300 }, { "epoch": 0.27, "learning_rate": 3.631444124239514e-05, "loss": 1.7293, "step": 428400 }, { "epoch": 0.27, "learning_rate": 3.631123919308357e-05, "loss": 1.7226, "step": 428500 }, { "epoch": 0.27, "learning_rate": 3.630803714377202e-05, "loss": 1.7087, "step": 428600 }, { "epoch": 0.27, "learning_rate": 3.630483509446045e-05, "loss": 1.7058, "step": 428700 }, { "epoch": 0.27, "learning_rate": 3.63016330451489e-05, "loss": 1.7202, "step": 428800 }, { "epoch": 0.27, "learning_rate": 3.629843099583734e-05, "loss": 1.7216, "step": 428900 }, { "epoch": 0.27, "learning_rate": 3.629522894652578e-05, "loss": 1.7253, "step": 429000 }, { "epoch": 0.27, "eval_loss": 1.7448601722717285, "eval_runtime": 89.9407, "eval_samples_per_second": 111.184, "eval_steps_per_second": 6.949, "step": 429000 }, { "epoch": 0.27, "learning_rate": 3.6292026897214224e-05, "loss": 1.7434, "step": 429100 }, { "epoch": 0.27, "learning_rate": 3.6288824847902657e-05, "loss": 1.7088, "step": 429200 }, { "epoch": 0.27, "learning_rate": 3.62856227985911e-05, "loss": 1.6995, "step": 429300 }, { "epoch": 0.27, "learning_rate": 3.6282420749279536e-05, "loss": 1.7442, "step": 429400 }, { "epoch": 0.27, "learning_rate": 3.627921869996798e-05, "loss": 1.6955, "step": 429500 }, { "epoch": 0.27, "learning_rate": 3.627601665065642e-05, "loss": 1.7006, "step": 429600 }, { "epoch": 0.28, "learning_rate": 3.627281460134486e-05, "loss": 1.7139, "step": 429700 }, { "epoch": 0.28, "learning_rate": 3.626961255203331e-05, "loss": 1.7115, "step": 429800 }, { "epoch": 0.28, "learning_rate": 3.626641050272174e-05, "loss": 1.721, "step": 429900 }, { "epoch": 0.28, "learning_rate": 3.626320845341019e-05, "loss": 1.7248, "step": 430000 }, { "epoch": 0.28, "eval_loss": 1.744076132774353, "eval_runtime": 92.2138, "eval_samples_per_second": 108.444, "eval_steps_per_second": 6.778, "step": 430000 }, { "epoch": 0.28, "learning_rate": 3.626000640409862e-05, "loss": 1.7301, "step": 430100 }, { "epoch": 0.28, "learning_rate": 3.625680435478707e-05, "loss": 1.7386, "step": 430200 }, { "epoch": 0.28, "learning_rate": 3.62536023054755e-05, "loss": 1.7475, "step": 430300 }, { "epoch": 0.28, "learning_rate": 3.625040025616395e-05, "loss": 1.7116, "step": 430400 }, { "epoch": 0.28, "learning_rate": 3.624719820685239e-05, "loss": 1.7269, "step": 430500 }, { "epoch": 0.28, "learning_rate": 3.6243996157540826e-05, "loss": 1.715, "step": 430600 }, { "epoch": 0.28, "learning_rate": 3.624079410822927e-05, "loss": 1.7065, "step": 430700 }, { "epoch": 0.28, "learning_rate": 3.6237592058917706e-05, "loss": 1.7296, "step": 430800 }, { "epoch": 0.28, "learning_rate": 3.623439000960615e-05, "loss": 1.7257, "step": 430900 }, { "epoch": 0.28, "learning_rate": 3.6231187960294585e-05, "loss": 1.7161, "step": 431000 }, { "epoch": 0.28, "eval_loss": 1.7414966821670532, "eval_runtime": 92.3163, "eval_samples_per_second": 108.323, "eval_steps_per_second": 6.77, "step": 431000 }, { "epoch": 0.28, "learning_rate": 3.622798591098303e-05, "loss": 1.7208, "step": 431100 }, { "epoch": 0.28, "learning_rate": 3.622478386167147e-05, "loss": 1.7184, "step": 431200 }, { "epoch": 0.28, "learning_rate": 3.622158181235991e-05, "loss": 1.7086, "step": 431300 }, { "epoch": 0.28, "learning_rate": 3.621837976304836e-05, "loss": 1.7268, "step": 431400 }, { "epoch": 0.28, "learning_rate": 3.621517771373679e-05, "loss": 1.7131, "step": 431500 }, { "epoch": 0.28, "learning_rate": 3.621197566442524e-05, "loss": 1.7118, "step": 431600 }, { "epoch": 0.28, "learning_rate": 3.620877361511367e-05, "loss": 1.7254, "step": 431700 }, { "epoch": 0.28, "learning_rate": 3.620557156580212e-05, "loss": 1.7203, "step": 431800 }, { "epoch": 0.28, "learning_rate": 3.620236951649055e-05, "loss": 1.7077, "step": 431900 }, { "epoch": 0.28, "learning_rate": 3.6199167467178996e-05, "loss": 1.717, "step": 432000 }, { "epoch": 0.28, "eval_loss": 1.7439628839492798, "eval_runtime": 89.3902, "eval_samples_per_second": 111.869, "eval_steps_per_second": 6.992, "step": 432000 }, { "epoch": 0.28, "learning_rate": 3.6195965417867436e-05, "loss": 1.6969, "step": 432100 }, { "epoch": 0.28, "learning_rate": 3.6192763368555876e-05, "loss": 1.7398, "step": 432200 }, { "epoch": 0.28, "learning_rate": 3.618956131924432e-05, "loss": 1.7018, "step": 432300 }, { "epoch": 0.28, "learning_rate": 3.6186359269932755e-05, "loss": 1.7009, "step": 432400 }, { "epoch": 0.28, "learning_rate": 3.61831572206212e-05, "loss": 1.7237, "step": 432500 }, { "epoch": 0.28, "learning_rate": 3.6179955171309635e-05, "loss": 1.73, "step": 432600 }, { "epoch": 0.28, "learning_rate": 3.617675312199808e-05, "loss": 1.7202, "step": 432700 }, { "epoch": 0.28, "learning_rate": 3.617355107268652e-05, "loss": 1.7334, "step": 432800 }, { "epoch": 0.28, "learning_rate": 3.617034902337496e-05, "loss": 1.7199, "step": 432900 }, { "epoch": 0.28, "learning_rate": 3.616714697406341e-05, "loss": 1.7294, "step": 433000 }, { "epoch": 0.28, "eval_loss": 1.740989089012146, "eval_runtime": 89.0291, "eval_samples_per_second": 112.323, "eval_steps_per_second": 7.02, "step": 433000 }, { "epoch": 0.28, "learning_rate": 3.616394492475184e-05, "loss": 1.7308, "step": 433100 }, { "epoch": 0.28, "learning_rate": 3.616074287544029e-05, "loss": 1.7226, "step": 433200 }, { "epoch": 0.28, "learning_rate": 3.615754082612872e-05, "loss": 1.708, "step": 433300 }, { "epoch": 0.28, "learning_rate": 3.6154338776817166e-05, "loss": 1.7087, "step": 433400 }, { "epoch": 0.28, "learning_rate": 3.6151136727505606e-05, "loss": 1.7216, "step": 433500 }, { "epoch": 0.28, "learning_rate": 3.6147934678194046e-05, "loss": 1.7193, "step": 433600 }, { "epoch": 0.28, "learning_rate": 3.6144732628882485e-05, "loss": 1.6955, "step": 433700 }, { "epoch": 0.28, "learning_rate": 3.6141530579570925e-05, "loss": 1.7126, "step": 433800 }, { "epoch": 0.28, "learning_rate": 3.613832853025937e-05, "loss": 1.7217, "step": 433900 }, { "epoch": 0.28, "learning_rate": 3.6135126480947805e-05, "loss": 1.7051, "step": 434000 }, { "epoch": 0.28, "eval_loss": 1.7421048879623413, "eval_runtime": 95.8236, "eval_samples_per_second": 104.358, "eval_steps_per_second": 6.522, "step": 434000 }, { "epoch": 0.28, "learning_rate": 3.613192443163625e-05, "loss": 1.738, "step": 434100 }, { "epoch": 0.28, "learning_rate": 3.6128722382324684e-05, "loss": 1.7071, "step": 434200 }, { "epoch": 0.28, "learning_rate": 3.612552033301313e-05, "loss": 1.71, "step": 434300 }, { "epoch": 0.28, "learning_rate": 3.612231828370157e-05, "loss": 1.7182, "step": 434400 }, { "epoch": 0.28, "learning_rate": 3.611911623439001e-05, "loss": 1.7508, "step": 434500 }, { "epoch": 0.28, "learning_rate": 3.611591418507846e-05, "loss": 1.72, "step": 434600 }, { "epoch": 0.28, "learning_rate": 3.611271213576689e-05, "loss": 1.7058, "step": 434700 }, { "epoch": 0.28, "learning_rate": 3.6109510086455336e-05, "loss": 1.7171, "step": 434800 }, { "epoch": 0.28, "learning_rate": 3.610630803714377e-05, "loss": 1.7146, "step": 434900 }, { "epoch": 0.28, "learning_rate": 3.6103105987832216e-05, "loss": 1.7137, "step": 435000 }, { "epoch": 0.28, "eval_loss": 1.742309808731079, "eval_runtime": 95.1153, "eval_samples_per_second": 105.136, "eval_steps_per_second": 6.571, "step": 435000 }, { "epoch": 0.28, "learning_rate": 3.6099903938520655e-05, "loss": 1.7069, "step": 435100 }, { "epoch": 0.28, "learning_rate": 3.6096701889209095e-05, "loss": 1.713, "step": 435200 }, { "epoch": 0.28, "learning_rate": 3.6093499839897535e-05, "loss": 1.717, "step": 435300 }, { "epoch": 0.28, "learning_rate": 3.6090297790585975e-05, "loss": 1.7277, "step": 435400 }, { "epoch": 0.28, "learning_rate": 3.608709574127442e-05, "loss": 1.7362, "step": 435500 }, { "epoch": 0.28, "learning_rate": 3.6083893691962854e-05, "loss": 1.7113, "step": 435600 }, { "epoch": 0.28, "learning_rate": 3.60806916426513e-05, "loss": 1.7248, "step": 435700 }, { "epoch": 0.28, "learning_rate": 3.607748959333974e-05, "loss": 1.7073, "step": 435800 }, { "epoch": 0.28, "learning_rate": 3.607428754402818e-05, "loss": 1.7278, "step": 435900 }, { "epoch": 0.28, "learning_rate": 3.607108549471662e-05, "loss": 1.7051, "step": 436000 }, { "epoch": 0.28, "eval_loss": 1.738287091255188, "eval_runtime": 94.5519, "eval_samples_per_second": 105.762, "eval_steps_per_second": 6.61, "step": 436000 }, { "epoch": 0.28, "learning_rate": 3.606788344540506e-05, "loss": 1.7378, "step": 436100 }, { "epoch": 0.28, "learning_rate": 3.60646813960935e-05, "loss": 1.7283, "step": 436200 }, { "epoch": 0.28, "learning_rate": 3.606147934678194e-05, "loss": 1.719, "step": 436300 }, { "epoch": 0.28, "learning_rate": 3.6058277297470385e-05, "loss": 1.7206, "step": 436400 }, { "epoch": 0.28, "learning_rate": 3.6055075248158825e-05, "loss": 1.7169, "step": 436500 }, { "epoch": 0.28, "learning_rate": 3.6051873198847265e-05, "loss": 1.7201, "step": 436600 }, { "epoch": 0.28, "learning_rate": 3.6048671149535705e-05, "loss": 1.7059, "step": 436700 }, { "epoch": 0.28, "learning_rate": 3.6045469100224144e-05, "loss": 1.7166, "step": 436800 }, { "epoch": 0.28, "learning_rate": 3.6042267050912584e-05, "loss": 1.7302, "step": 436900 }, { "epoch": 0.28, "learning_rate": 3.6039065001601024e-05, "loss": 1.7203, "step": 437000 }, { "epoch": 0.28, "eval_loss": 1.740354061126709, "eval_runtime": 95.0305, "eval_samples_per_second": 105.229, "eval_steps_per_second": 6.577, "step": 437000 }, { "epoch": 0.28, "learning_rate": 3.603586295228947e-05, "loss": 1.7153, "step": 437100 }, { "epoch": 0.28, "learning_rate": 3.60326609029779e-05, "loss": 1.6921, "step": 437200 }, { "epoch": 0.28, "learning_rate": 3.602945885366635e-05, "loss": 1.6997, "step": 437300 }, { "epoch": 0.28, "learning_rate": 3.602625680435479e-05, "loss": 1.7259, "step": 437400 }, { "epoch": 0.28, "learning_rate": 3.602305475504323e-05, "loss": 1.7279, "step": 437500 }, { "epoch": 0.28, "learning_rate": 3.601985270573167e-05, "loss": 1.6975, "step": 437600 }, { "epoch": 0.28, "learning_rate": 3.601665065642011e-05, "loss": 1.7114, "step": 437700 }, { "epoch": 0.28, "learning_rate": 3.601344860710855e-05, "loss": 1.7258, "step": 437800 }, { "epoch": 0.28, "learning_rate": 3.601024655779699e-05, "loss": 1.727, "step": 437900 }, { "epoch": 0.28, "learning_rate": 3.6007044508485435e-05, "loss": 1.7228, "step": 438000 }, { "epoch": 0.28, "eval_loss": 1.7410777807235718, "eval_runtime": 90.7801, "eval_samples_per_second": 110.156, "eval_steps_per_second": 6.885, "step": 438000 }, { "epoch": 0.28, "learning_rate": 3.6003842459173875e-05, "loss": 1.713, "step": 438100 }, { "epoch": 0.28, "learning_rate": 3.6000640409862314e-05, "loss": 1.7017, "step": 438200 }, { "epoch": 0.28, "learning_rate": 3.5997438360550754e-05, "loss": 1.7346, "step": 438300 }, { "epoch": 0.28, "learning_rate": 3.5994236311239194e-05, "loss": 1.7321, "step": 438400 }, { "epoch": 0.28, "learning_rate": 3.5991034261927634e-05, "loss": 1.7179, "step": 438500 }, { "epoch": 0.28, "learning_rate": 3.598783221261607e-05, "loss": 1.7282, "step": 438600 }, { "epoch": 0.28, "learning_rate": 3.598463016330452e-05, "loss": 1.7049, "step": 438700 }, { "epoch": 0.28, "learning_rate": 3.598142811399296e-05, "loss": 1.7019, "step": 438800 }, { "epoch": 0.28, "learning_rate": 3.59782260646814e-05, "loss": 1.7109, "step": 438900 }, { "epoch": 0.28, "learning_rate": 3.597502401536984e-05, "loss": 1.7062, "step": 439000 }, { "epoch": 0.28, "eval_loss": 1.7423441410064697, "eval_runtime": 90.7422, "eval_samples_per_second": 110.202, "eval_steps_per_second": 6.888, "step": 439000 }, { "epoch": 0.28, "learning_rate": 3.597182196605828e-05, "loss": 1.7191, "step": 439100 }, { "epoch": 0.28, "learning_rate": 3.596861991674672e-05, "loss": 1.7199, "step": 439200 }, { "epoch": 0.28, "learning_rate": 3.596541786743516e-05, "loss": 1.7036, "step": 439300 }, { "epoch": 0.28, "learning_rate": 3.59622158181236e-05, "loss": 1.7106, "step": 439400 }, { "epoch": 0.28, "learning_rate": 3.595901376881204e-05, "loss": 1.7108, "step": 439500 }, { "epoch": 0.28, "learning_rate": 3.5955811719500484e-05, "loss": 1.7156, "step": 439600 }, { "epoch": 0.28, "learning_rate": 3.5952609670188924e-05, "loss": 1.7186, "step": 439700 }, { "epoch": 0.28, "learning_rate": 3.5949407620877364e-05, "loss": 1.7302, "step": 439800 }, { "epoch": 0.28, "learning_rate": 3.5946205571565803e-05, "loss": 1.6997, "step": 439900 }, { "epoch": 0.28, "learning_rate": 3.594300352225424e-05, "loss": 1.7437, "step": 440000 }, { "epoch": 0.28, "eval_loss": 1.7405693531036377, "eval_runtime": 95.2242, "eval_samples_per_second": 105.015, "eval_steps_per_second": 6.563, "step": 440000 }, { "epoch": 0.28, "learning_rate": 3.593980147294268e-05, "loss": 1.7028, "step": 440100 }, { "epoch": 0.28, "learning_rate": 3.593659942363112e-05, "loss": 1.7176, "step": 440200 }, { "epoch": 0.28, "learning_rate": 3.593339737431957e-05, "loss": 1.7012, "step": 440300 }, { "epoch": 0.28, "learning_rate": 3.593019532500801e-05, "loss": 1.7313, "step": 440400 }, { "epoch": 0.28, "learning_rate": 3.592699327569645e-05, "loss": 1.6921, "step": 440500 }, { "epoch": 0.28, "learning_rate": 3.592379122638489e-05, "loss": 1.7067, "step": 440600 }, { "epoch": 0.28, "learning_rate": 3.592058917707333e-05, "loss": 1.7253, "step": 440700 }, { "epoch": 0.28, "learning_rate": 3.591738712776177e-05, "loss": 1.7416, "step": 440800 }, { "epoch": 0.28, "learning_rate": 3.591418507845021e-05, "loss": 1.7286, "step": 440900 }, { "epoch": 0.28, "learning_rate": 3.591098302913865e-05, "loss": 1.7112, "step": 441000 }, { "epoch": 0.28, "eval_loss": 1.742679476737976, "eval_runtime": 97.6113, "eval_samples_per_second": 102.447, "eval_steps_per_second": 6.403, "step": 441000 }, { "epoch": 0.28, "learning_rate": 3.5907780979827094e-05, "loss": 1.7216, "step": 441100 }, { "epoch": 0.28, "learning_rate": 3.5904578930515534e-05, "loss": 1.7165, "step": 441200 }, { "epoch": 0.28, "learning_rate": 3.590137688120397e-05, "loss": 1.7253, "step": 441300 }, { "epoch": 0.28, "learning_rate": 3.589817483189241e-05, "loss": 1.7169, "step": 441400 }, { "epoch": 0.28, "learning_rate": 3.589497278258085e-05, "loss": 1.7099, "step": 441500 }, { "epoch": 0.28, "learning_rate": 3.589177073326929e-05, "loss": 1.7017, "step": 441600 }, { "epoch": 0.28, "learning_rate": 3.588856868395773e-05, "loss": 1.7331, "step": 441700 }, { "epoch": 0.28, "learning_rate": 3.588536663464617e-05, "loss": 1.7117, "step": 441800 }, { "epoch": 0.28, "learning_rate": 3.588216458533462e-05, "loss": 1.7251, "step": 441900 }, { "epoch": 0.28, "learning_rate": 3.587896253602306e-05, "loss": 1.7189, "step": 442000 }, { "epoch": 0.28, "eval_loss": 1.7429040670394897, "eval_runtime": 93.0624, "eval_samples_per_second": 107.455, "eval_steps_per_second": 6.716, "step": 442000 }, { "epoch": 0.28, "learning_rate": 3.58757604867115e-05, "loss": 1.7168, "step": 442100 }, { "epoch": 0.28, "learning_rate": 3.587255843739994e-05, "loss": 1.7, "step": 442200 }, { "epoch": 0.28, "learning_rate": 3.586935638808838e-05, "loss": 1.7379, "step": 442300 }, { "epoch": 0.28, "learning_rate": 3.586615433877682e-05, "loss": 1.7063, "step": 442400 }, { "epoch": 0.28, "learning_rate": 3.586295228946526e-05, "loss": 1.7093, "step": 442500 }, { "epoch": 0.28, "learning_rate": 3.58597502401537e-05, "loss": 1.7314, "step": 442600 }, { "epoch": 0.28, "learning_rate": 3.585654819084214e-05, "loss": 1.7088, "step": 442700 }, { "epoch": 0.28, "learning_rate": 3.585334614153058e-05, "loss": 1.7146, "step": 442800 }, { "epoch": 0.28, "learning_rate": 3.585014409221902e-05, "loss": 1.7183, "step": 442900 }, { "epoch": 0.28, "learning_rate": 3.584694204290746e-05, "loss": 1.7167, "step": 443000 }, { "epoch": 0.28, "eval_loss": 1.7403675317764282, "eval_runtime": 96.8838, "eval_samples_per_second": 103.216, "eval_steps_per_second": 6.451, "step": 443000 }, { "epoch": 0.28, "learning_rate": 3.58437399935959e-05, "loss": 1.7199, "step": 443100 }, { "epoch": 0.28, "learning_rate": 3.584053794428434e-05, "loss": 1.7318, "step": 443200 }, { "epoch": 0.28, "learning_rate": 3.583733589497278e-05, "loss": 1.722, "step": 443300 }, { "epoch": 0.28, "learning_rate": 3.583413384566123e-05, "loss": 1.7216, "step": 443400 }, { "epoch": 0.28, "learning_rate": 3.583093179634967e-05, "loss": 1.7226, "step": 443500 }, { "epoch": 0.28, "learning_rate": 3.582772974703811e-05, "loss": 1.7037, "step": 443600 }, { "epoch": 0.28, "learning_rate": 3.582452769772655e-05, "loss": 1.7152, "step": 443700 }, { "epoch": 0.28, "learning_rate": 3.582132564841499e-05, "loss": 1.6978, "step": 443800 }, { "epoch": 0.28, "learning_rate": 3.581812359910343e-05, "loss": 1.7262, "step": 443900 }, { "epoch": 0.28, "learning_rate": 3.5814921549791867e-05, "loss": 1.7345, "step": 444000 }, { "epoch": 0.28, "eval_loss": 1.7410849332809448, "eval_runtime": 94.7951, "eval_samples_per_second": 105.491, "eval_steps_per_second": 6.593, "step": 444000 }, { "epoch": 0.28, "learning_rate": 3.581171950048031e-05, "loss": 1.7167, "step": 444100 }, { "epoch": 0.28, "learning_rate": 3.5808517451168746e-05, "loss": 1.7023, "step": 444200 }, { "epoch": 0.28, "learning_rate": 3.580531540185719e-05, "loss": 1.7032, "step": 444300 }, { "epoch": 0.28, "learning_rate": 3.580211335254563e-05, "loss": 1.7102, "step": 444400 }, { "epoch": 0.28, "learning_rate": 3.579891130323407e-05, "loss": 1.7058, "step": 444500 }, { "epoch": 0.28, "learning_rate": 3.579570925392251e-05, "loss": 1.7272, "step": 444600 }, { "epoch": 0.28, "learning_rate": 3.579250720461095e-05, "loss": 1.725, "step": 444700 }, { "epoch": 0.28, "learning_rate": 3.578930515529939e-05, "loss": 1.7132, "step": 444800 }, { "epoch": 0.28, "learning_rate": 3.578610310598783e-05, "loss": 1.6953, "step": 444900 }, { "epoch": 0.28, "learning_rate": 3.578290105667628e-05, "loss": 1.7028, "step": 445000 }, { "epoch": 0.28, "eval_loss": 1.7449302673339844, "eval_runtime": 92.0536, "eval_samples_per_second": 108.632, "eval_steps_per_second": 6.79, "step": 445000 }, { "epoch": 0.28, "learning_rate": 3.577969900736472e-05, "loss": 1.7196, "step": 445100 }, { "epoch": 0.28, "learning_rate": 3.577649695805316e-05, "loss": 1.7097, "step": 445200 }, { "epoch": 0.28, "learning_rate": 3.57732949087416e-05, "loss": 1.7197, "step": 445300 }, { "epoch": 0.29, "learning_rate": 3.5770092859430036e-05, "loss": 1.7214, "step": 445400 }, { "epoch": 0.29, "learning_rate": 3.5766890810118476e-05, "loss": 1.7031, "step": 445500 }, { "epoch": 0.29, "learning_rate": 3.5763688760806916e-05, "loss": 1.7158, "step": 445600 }, { "epoch": 0.29, "learning_rate": 3.576048671149536e-05, "loss": 1.7023, "step": 445700 }, { "epoch": 0.29, "learning_rate": 3.5757284662183795e-05, "loss": 1.7046, "step": 445800 }, { "epoch": 0.29, "learning_rate": 3.575408261287224e-05, "loss": 1.706, "step": 445900 }, { "epoch": 0.29, "learning_rate": 3.575088056356068e-05, "loss": 1.7199, "step": 446000 }, { "epoch": 0.29, "eval_loss": 1.7419145107269287, "eval_runtime": 92.2074, "eval_samples_per_second": 108.451, "eval_steps_per_second": 6.778, "step": 446000 }, { "epoch": 0.29, "learning_rate": 3.574767851424912e-05, "loss": 1.7057, "step": 446100 }, { "epoch": 0.29, "learning_rate": 3.574447646493756e-05, "loss": 1.7071, "step": 446200 }, { "epoch": 0.29, "learning_rate": 3.5741274415626e-05, "loss": 1.7245, "step": 446300 }, { "epoch": 0.29, "learning_rate": 3.573807236631445e-05, "loss": 1.6958, "step": 446400 }, { "epoch": 0.29, "learning_rate": 3.573487031700288e-05, "loss": 1.7182, "step": 446500 }, { "epoch": 0.29, "learning_rate": 3.573166826769133e-05, "loss": 1.7174, "step": 446600 }, { "epoch": 0.29, "learning_rate": 3.572846621837977e-05, "loss": 1.7118, "step": 446700 }, { "epoch": 0.29, "learning_rate": 3.5725264169068206e-05, "loss": 1.7028, "step": 446800 }, { "epoch": 0.29, "learning_rate": 3.5722062119756646e-05, "loss": 1.6977, "step": 446900 }, { "epoch": 0.29, "learning_rate": 3.5718860070445086e-05, "loss": 1.7209, "step": 447000 }, { "epoch": 0.29, "eval_loss": 1.7412607669830322, "eval_runtime": 94.2919, "eval_samples_per_second": 106.054, "eval_steps_per_second": 6.628, "step": 447000 }, { "epoch": 0.29, "learning_rate": 3.5715658021133526e-05, "loss": 1.7186, "step": 447100 }, { "epoch": 0.29, "learning_rate": 3.5712455971821965e-05, "loss": 1.7023, "step": 447200 }, { "epoch": 0.29, "learning_rate": 3.570925392251041e-05, "loss": 1.7118, "step": 447300 }, { "epoch": 0.29, "learning_rate": 3.5706051873198845e-05, "loss": 1.706, "step": 447400 }, { "epoch": 0.29, "learning_rate": 3.570284982388729e-05, "loss": 1.7171, "step": 447500 }, { "epoch": 0.29, "learning_rate": 3.569964777457573e-05, "loss": 1.7006, "step": 447600 }, { "epoch": 0.29, "learning_rate": 3.569644572526417e-05, "loss": 1.7058, "step": 447700 }, { "epoch": 0.29, "learning_rate": 3.569324367595261e-05, "loss": 1.7066, "step": 447800 }, { "epoch": 0.29, "learning_rate": 3.569004162664105e-05, "loss": 1.6951, "step": 447900 }, { "epoch": 0.29, "learning_rate": 3.56868395773295e-05, "loss": 1.7109, "step": 448000 }, { "epoch": 0.29, "eval_loss": 1.7406386137008667, "eval_runtime": 92.006, "eval_samples_per_second": 108.689, "eval_steps_per_second": 6.793, "step": 448000 }, { "epoch": 0.29, "learning_rate": 3.568363752801793e-05, "loss": 1.696, "step": 448100 }, { "epoch": 0.29, "learning_rate": 3.5680435478706376e-05, "loss": 1.6965, "step": 448200 }, { "epoch": 0.29, "learning_rate": 3.5677233429394816e-05, "loss": 1.7248, "step": 448300 }, { "epoch": 0.29, "learning_rate": 3.5674031380083256e-05, "loss": 1.7249, "step": 448400 }, { "epoch": 0.29, "learning_rate": 3.5670829330771695e-05, "loss": 1.7061, "step": 448500 }, { "epoch": 0.29, "learning_rate": 3.5667627281460135e-05, "loss": 1.684, "step": 448600 }, { "epoch": 0.29, "learning_rate": 3.566442523214858e-05, "loss": 1.732, "step": 448700 }, { "epoch": 0.29, "learning_rate": 3.5661223182837015e-05, "loss": 1.7044, "step": 448800 }, { "epoch": 0.29, "learning_rate": 3.565802113352546e-05, "loss": 1.7204, "step": 448900 }, { "epoch": 0.29, "learning_rate": 3.5654819084213894e-05, "loss": 1.7114, "step": 449000 }, { "epoch": 0.29, "eval_loss": 1.7423685789108276, "eval_runtime": 91.8859, "eval_samples_per_second": 108.831, "eval_steps_per_second": 6.802, "step": 449000 }, { "epoch": 0.29, "learning_rate": 3.565161703490234e-05, "loss": 1.7036, "step": 449100 }, { "epoch": 0.29, "learning_rate": 3.564841498559078e-05, "loss": 1.7082, "step": 449200 }, { "epoch": 0.29, "learning_rate": 3.564521293627922e-05, "loss": 1.7245, "step": 449300 }, { "epoch": 0.29, "learning_rate": 3.564201088696766e-05, "loss": 1.7214, "step": 449400 }, { "epoch": 0.29, "learning_rate": 3.56388088376561e-05, "loss": 1.7028, "step": 449500 }, { "epoch": 0.29, "learning_rate": 3.5635606788344546e-05, "loss": 1.7106, "step": 449600 }, { "epoch": 0.29, "learning_rate": 3.563240473903298e-05, "loss": 1.7132, "step": 449700 }, { "epoch": 0.29, "learning_rate": 3.5629202689721426e-05, "loss": 1.7211, "step": 449800 }, { "epoch": 0.29, "learning_rate": 3.5626000640409865e-05, "loss": 1.7202, "step": 449900 }, { "epoch": 0.29, "learning_rate": 3.5622798591098305e-05, "loss": 1.6929, "step": 450000 }, { "epoch": 0.29, "eval_loss": 1.744065284729004, "eval_runtime": 96.2951, "eval_samples_per_second": 103.847, "eval_steps_per_second": 6.49, "step": 450000 }, { "epoch": 0.29, "learning_rate": 3.5619596541786745e-05, "loss": 1.7251, "step": 450100 }, { "epoch": 0.29, "learning_rate": 3.5616394492475185e-05, "loss": 1.7182, "step": 450200 }, { "epoch": 0.29, "learning_rate": 3.561319244316363e-05, "loss": 1.7216, "step": 450300 }, { "epoch": 0.29, "learning_rate": 3.5609990393852064e-05, "loss": 1.7358, "step": 450400 }, { "epoch": 0.29, "learning_rate": 3.560678834454051e-05, "loss": 1.7288, "step": 450500 }, { "epoch": 0.29, "learning_rate": 3.5603586295228944e-05, "loss": 1.7188, "step": 450600 }, { "epoch": 0.29, "learning_rate": 3.560038424591739e-05, "loss": 1.7019, "step": 450700 }, { "epoch": 0.29, "learning_rate": 3.559718219660583e-05, "loss": 1.7087, "step": 450800 }, { "epoch": 0.29, "learning_rate": 3.559398014729427e-05, "loss": 1.7073, "step": 450900 }, { "epoch": 0.29, "learning_rate": 3.5590778097982716e-05, "loss": 1.7275, "step": 451000 }, { "epoch": 0.29, "eval_loss": 1.739660382270813, "eval_runtime": 94.1687, "eval_samples_per_second": 106.192, "eval_steps_per_second": 6.637, "step": 451000 }, { "epoch": 0.29, "learning_rate": 3.558757604867115e-05, "loss": 1.7137, "step": 451100 }, { "epoch": 0.29, "learning_rate": 3.5584373999359596e-05, "loss": 1.7132, "step": 451200 }, { "epoch": 0.29, "learning_rate": 3.558117195004803e-05, "loss": 1.7127, "step": 451300 }, { "epoch": 0.29, "learning_rate": 3.5577969900736475e-05, "loss": 1.7108, "step": 451400 }, { "epoch": 0.29, "learning_rate": 3.5574767851424915e-05, "loss": 1.7177, "step": 451500 }, { "epoch": 0.29, "learning_rate": 3.5571565802113355e-05, "loss": 1.7009, "step": 451600 }, { "epoch": 0.29, "learning_rate": 3.5568363752801794e-05, "loss": 1.7057, "step": 451700 }, { "epoch": 0.29, "learning_rate": 3.5565161703490234e-05, "loss": 1.703, "step": 451800 }, { "epoch": 0.29, "learning_rate": 3.556195965417868e-05, "loss": 1.696, "step": 451900 }, { "epoch": 0.29, "learning_rate": 3.5558757604867113e-05, "loss": 1.731, "step": 452000 }, { "epoch": 0.29, "eval_loss": 1.7398847341537476, "eval_runtime": 95.7108, "eval_samples_per_second": 104.481, "eval_steps_per_second": 6.53, "step": 452000 }, { "epoch": 0.29, "learning_rate": 3.555555555555556e-05, "loss": 1.7284, "step": 452100 }, { "epoch": 0.29, "learning_rate": 3.555235350624399e-05, "loss": 1.7099, "step": 452200 }, { "epoch": 0.29, "learning_rate": 3.554915145693244e-05, "loss": 1.7027, "step": 452300 }, { "epoch": 0.29, "learning_rate": 3.554594940762088e-05, "loss": 1.7109, "step": 452400 }, { "epoch": 0.29, "learning_rate": 3.554274735830932e-05, "loss": 1.7116, "step": 452500 }, { "epoch": 0.29, "learning_rate": 3.5539545308997765e-05, "loss": 1.7278, "step": 452600 }, { "epoch": 0.29, "learning_rate": 3.55363432596862e-05, "loss": 1.7161, "step": 452700 }, { "epoch": 0.29, "learning_rate": 3.5533141210374645e-05, "loss": 1.706, "step": 452800 }, { "epoch": 0.29, "learning_rate": 3.552993916106308e-05, "loss": 1.7333, "step": 452900 }, { "epoch": 0.29, "learning_rate": 3.5526737111751524e-05, "loss": 1.7091, "step": 453000 }, { "epoch": 0.29, "eval_loss": 1.739164113998413, "eval_runtime": 92.3475, "eval_samples_per_second": 108.287, "eval_steps_per_second": 6.768, "step": 453000 }, { "epoch": 0.29, "learning_rate": 3.5523535062439964e-05, "loss": 1.7236, "step": 453100 }, { "epoch": 0.29, "learning_rate": 3.5520333013128404e-05, "loss": 1.71, "step": 453200 }, { "epoch": 0.29, "learning_rate": 3.551713096381685e-05, "loss": 1.7037, "step": 453300 }, { "epoch": 0.29, "learning_rate": 3.551392891450528e-05, "loss": 1.7205, "step": 453400 }, { "epoch": 0.29, "learning_rate": 3.551072686519373e-05, "loss": 1.7006, "step": 453500 }, { "epoch": 0.29, "learning_rate": 3.550752481588216e-05, "loss": 1.7196, "step": 453600 }, { "epoch": 0.29, "learning_rate": 3.550432276657061e-05, "loss": 1.7266, "step": 453700 }, { "epoch": 0.29, "learning_rate": 3.550112071725904e-05, "loss": 1.7018, "step": 453800 }, { "epoch": 0.29, "learning_rate": 3.549791866794749e-05, "loss": 1.7104, "step": 453900 }, { "epoch": 0.29, "learning_rate": 3.549471661863593e-05, "loss": 1.7174, "step": 454000 }, { "epoch": 0.29, "eval_loss": 1.7386163473129272, "eval_runtime": 88.9634, "eval_samples_per_second": 112.406, "eval_steps_per_second": 7.025, "step": 454000 }, { "epoch": 0.29, "learning_rate": 3.549151456932437e-05, "loss": 1.7046, "step": 454100 }, { "epoch": 0.29, "learning_rate": 3.5488312520012815e-05, "loss": 1.6987, "step": 454200 }, { "epoch": 0.29, "learning_rate": 3.548511047070125e-05, "loss": 1.714, "step": 454300 }, { "epoch": 0.29, "learning_rate": 3.5481908421389694e-05, "loss": 1.7093, "step": 454400 }, { "epoch": 0.29, "learning_rate": 3.547870637207813e-05, "loss": 1.7078, "step": 454500 }, { "epoch": 0.29, "learning_rate": 3.5475504322766574e-05, "loss": 1.7053, "step": 454600 }, { "epoch": 0.29, "learning_rate": 3.5472302273455014e-05, "loss": 1.7192, "step": 454700 }, { "epoch": 0.29, "learning_rate": 3.546910022414345e-05, "loss": 1.6902, "step": 454800 }, { "epoch": 0.29, "learning_rate": 3.546589817483189e-05, "loss": 1.7141, "step": 454900 }, { "epoch": 0.29, "learning_rate": 3.546269612552033e-05, "loss": 1.6964, "step": 455000 }, { "epoch": 0.29, "eval_loss": 1.7388941049575806, "eval_runtime": 93.3291, "eval_samples_per_second": 107.148, "eval_steps_per_second": 6.697, "step": 455000 }, { "epoch": 0.29, "learning_rate": 3.545949407620878e-05, "loss": 1.7023, "step": 455100 }, { "epoch": 0.29, "learning_rate": 3.545629202689721e-05, "loss": 1.7266, "step": 455200 }, { "epoch": 0.29, "learning_rate": 3.545308997758566e-05, "loss": 1.6958, "step": 455300 }, { "epoch": 0.29, "learning_rate": 3.544988792827409e-05, "loss": 1.7125, "step": 455400 }, { "epoch": 0.29, "learning_rate": 3.544668587896254e-05, "loss": 1.6868, "step": 455500 }, { "epoch": 0.29, "learning_rate": 3.544348382965098e-05, "loss": 1.7122, "step": 455600 }, { "epoch": 0.29, "learning_rate": 3.544028178033942e-05, "loss": 1.7126, "step": 455700 }, { "epoch": 0.29, "learning_rate": 3.5437079731027864e-05, "loss": 1.7182, "step": 455800 }, { "epoch": 0.29, "learning_rate": 3.54338776817163e-05, "loss": 1.6819, "step": 455900 }, { "epoch": 0.29, "learning_rate": 3.5430675632404744e-05, "loss": 1.71, "step": 456000 }, { "epoch": 0.29, "eval_loss": 1.738181233406067, "eval_runtime": 90.0301, "eval_samples_per_second": 111.074, "eval_steps_per_second": 6.942, "step": 456000 }, { "epoch": 0.29, "learning_rate": 3.542747358309318e-05, "loss": 1.7202, "step": 456100 }, { "epoch": 0.29, "learning_rate": 3.542427153378162e-05, "loss": 1.6992, "step": 456200 }, { "epoch": 0.29, "learning_rate": 3.542106948447006e-05, "loss": 1.7027, "step": 456300 }, { "epoch": 0.29, "learning_rate": 3.54178674351585e-05, "loss": 1.711, "step": 456400 }, { "epoch": 0.29, "learning_rate": 3.541466538584694e-05, "loss": 1.6897, "step": 456500 }, { "epoch": 0.29, "learning_rate": 3.541146333653538e-05, "loss": 1.7086, "step": 456600 }, { "epoch": 0.29, "learning_rate": 3.540826128722383e-05, "loss": 1.7111, "step": 456700 }, { "epoch": 0.29, "learning_rate": 3.540505923791226e-05, "loss": 1.7159, "step": 456800 }, { "epoch": 0.29, "learning_rate": 3.540185718860071e-05, "loss": 1.7263, "step": 456900 }, { "epoch": 0.29, "learning_rate": 3.539865513928914e-05, "loss": 1.7089, "step": 457000 }, { "epoch": 0.29, "eval_loss": 1.7398711442947388, "eval_runtime": 92.1214, "eval_samples_per_second": 108.552, "eval_steps_per_second": 6.785, "step": 457000 }, { "epoch": 0.29, "learning_rate": 3.539545308997759e-05, "loss": 1.7042, "step": 457100 }, { "epoch": 0.29, "learning_rate": 3.539225104066603e-05, "loss": 1.6863, "step": 457200 }, { "epoch": 0.29, "learning_rate": 3.538904899135447e-05, "loss": 1.7087, "step": 457300 }, { "epoch": 0.29, "learning_rate": 3.5385846942042914e-05, "loss": 1.7117, "step": 457400 }, { "epoch": 0.29, "learning_rate": 3.5382644892731347e-05, "loss": 1.7057, "step": 457500 }, { "epoch": 0.29, "learning_rate": 3.537944284341979e-05, "loss": 1.7102, "step": 457600 }, { "epoch": 0.29, "learning_rate": 3.5376240794108226e-05, "loss": 1.7121, "step": 457700 }, { "epoch": 0.29, "learning_rate": 3.537303874479667e-05, "loss": 1.6951, "step": 457800 }, { "epoch": 0.29, "learning_rate": 3.536983669548511e-05, "loss": 1.7023, "step": 457900 }, { "epoch": 0.29, "learning_rate": 3.536663464617355e-05, "loss": 1.7178, "step": 458000 }, { "epoch": 0.29, "eval_loss": 1.7376421689987183, "eval_runtime": 95.2936, "eval_samples_per_second": 104.939, "eval_steps_per_second": 6.559, "step": 458000 }, { "epoch": 0.29, "learning_rate": 3.536343259686199e-05, "loss": 1.6861, "step": 458100 }, { "epoch": 0.29, "learning_rate": 3.536023054755043e-05, "loss": 1.7146, "step": 458200 }, { "epoch": 0.29, "learning_rate": 3.535702849823888e-05, "loss": 1.7331, "step": 458300 }, { "epoch": 0.29, "learning_rate": 3.535382644892731e-05, "loss": 1.7159, "step": 458400 }, { "epoch": 0.29, "learning_rate": 3.535062439961576e-05, "loss": 1.6811, "step": 458500 }, { "epoch": 0.29, "learning_rate": 3.53474223503042e-05, "loss": 1.7119, "step": 458600 }, { "epoch": 0.29, "learning_rate": 3.534422030099264e-05, "loss": 1.7114, "step": 458700 }, { "epoch": 0.29, "learning_rate": 3.534101825168108e-05, "loss": 1.7201, "step": 458800 }, { "epoch": 0.29, "learning_rate": 3.5337816202369516e-05, "loss": 1.7176, "step": 458900 }, { "epoch": 0.29, "learning_rate": 3.533461415305796e-05, "loss": 1.7277, "step": 459000 }, { "epoch": 0.29, "eval_loss": 1.739035725593567, "eval_runtime": 89.2557, "eval_samples_per_second": 112.038, "eval_steps_per_second": 7.002, "step": 459000 }, { "epoch": 0.29, "learning_rate": 3.5331412103746396e-05, "loss": 1.7136, "step": 459100 }, { "epoch": 0.29, "learning_rate": 3.532821005443484e-05, "loss": 1.7121, "step": 459200 }, { "epoch": 0.29, "learning_rate": 3.5325008005123275e-05, "loss": 1.7381, "step": 459300 }, { "epoch": 0.29, "learning_rate": 3.532180595581172e-05, "loss": 1.6985, "step": 459400 }, { "epoch": 0.29, "learning_rate": 3.531860390650016e-05, "loss": 1.6998, "step": 459500 }, { "epoch": 0.29, "learning_rate": 3.53154018571886e-05, "loss": 1.7139, "step": 459600 }, { "epoch": 0.29, "learning_rate": 3.531219980787704e-05, "loss": 1.7068, "step": 459700 }, { "epoch": 0.29, "learning_rate": 3.530899775856548e-05, "loss": 1.723, "step": 459800 }, { "epoch": 0.29, "learning_rate": 3.530579570925393e-05, "loss": 1.7151, "step": 459900 }, { "epoch": 0.29, "learning_rate": 3.530259365994236e-05, "loss": 1.6991, "step": 460000 }, { "epoch": 0.29, "eval_loss": 1.7387909889221191, "eval_runtime": 89.4003, "eval_samples_per_second": 111.856, "eval_steps_per_second": 6.991, "step": 460000 }, { "epoch": 0.29, "learning_rate": 3.529939161063081e-05, "loss": 1.7049, "step": 460100 }, { "epoch": 0.29, "learning_rate": 3.5296189561319247e-05, "loss": 1.6952, "step": 460200 }, { "epoch": 0.29, "learning_rate": 3.5292987512007686e-05, "loss": 1.6998, "step": 460300 }, { "epoch": 0.29, "learning_rate": 3.5289785462696126e-05, "loss": 1.7058, "step": 460400 }, { "epoch": 0.29, "learning_rate": 3.5286583413384566e-05, "loss": 1.6948, "step": 460500 }, { "epoch": 0.29, "learning_rate": 3.528338136407301e-05, "loss": 1.7208, "step": 460600 }, { "epoch": 0.29, "learning_rate": 3.5280179314761445e-05, "loss": 1.7095, "step": 460700 }, { "epoch": 0.29, "learning_rate": 3.527697726544989e-05, "loss": 1.7103, "step": 460800 }, { "epoch": 0.29, "learning_rate": 3.527377521613833e-05, "loss": 1.7089, "step": 460900 }, { "epoch": 0.3, "learning_rate": 3.527057316682677e-05, "loss": 1.7213, "step": 461000 }, { "epoch": 0.3, "eval_loss": 1.739305853843689, "eval_runtime": 91.816, "eval_samples_per_second": 108.914, "eval_steps_per_second": 6.807, "step": 461000 }, { "epoch": 0.3, "learning_rate": 3.526737111751521e-05, "loss": 1.7103, "step": 461100 }, { "epoch": 0.3, "learning_rate": 3.526416906820365e-05, "loss": 1.7111, "step": 461200 }, { "epoch": 0.3, "learning_rate": 3.526096701889209e-05, "loss": 1.7122, "step": 461300 }, { "epoch": 0.3, "learning_rate": 3.525776496958053e-05, "loss": 1.716, "step": 461400 }, { "epoch": 0.3, "learning_rate": 3.525456292026898e-05, "loss": 1.7114, "step": 461500 }, { "epoch": 0.3, "learning_rate": 3.525136087095741e-05, "loss": 1.703, "step": 461600 }, { "epoch": 0.3, "learning_rate": 3.5248158821645856e-05, "loss": 1.698, "step": 461700 }, { "epoch": 0.3, "learning_rate": 3.5244956772334296e-05, "loss": 1.7185, "step": 461800 }, { "epoch": 0.3, "learning_rate": 3.5241754723022736e-05, "loss": 1.7075, "step": 461900 }, { "epoch": 0.3, "learning_rate": 3.5238552673711175e-05, "loss": 1.7314, "step": 462000 }, { "epoch": 0.3, "eval_loss": 1.7388224601745605, "eval_runtime": 90.6237, "eval_samples_per_second": 110.346, "eval_steps_per_second": 6.897, "step": 462000 }, { "epoch": 0.3, "learning_rate": 3.5235350624399615e-05, "loss": 1.6933, "step": 462100 }, { "epoch": 0.3, "learning_rate": 3.523214857508806e-05, "loss": 1.7158, "step": 462200 }, { "epoch": 0.3, "learning_rate": 3.5228946525776495e-05, "loss": 1.698, "step": 462300 }, { "epoch": 0.3, "learning_rate": 3.522574447646494e-05, "loss": 1.7046, "step": 462400 }, { "epoch": 0.3, "learning_rate": 3.522254242715338e-05, "loss": 1.7134, "step": 462500 }, { "epoch": 0.3, "learning_rate": 3.521934037784182e-05, "loss": 1.6981, "step": 462600 }, { "epoch": 0.3, "learning_rate": 3.521613832853026e-05, "loss": 1.706, "step": 462700 }, { "epoch": 0.3, "learning_rate": 3.52129362792187e-05, "loss": 1.7045, "step": 462800 }, { "epoch": 0.3, "learning_rate": 3.520973422990714e-05, "loss": 1.7072, "step": 462900 }, { "epoch": 0.3, "learning_rate": 3.520653218059558e-05, "loss": 1.7185, "step": 463000 }, { "epoch": 0.3, "eval_loss": 1.740618348121643, "eval_runtime": 90.8747, "eval_samples_per_second": 110.042, "eval_steps_per_second": 6.878, "step": 463000 }, { "epoch": 0.3, "learning_rate": 3.5203330131284026e-05, "loss": 1.7252, "step": 463100 }, { "epoch": 0.3, "learning_rate": 3.5200128081972466e-05, "loss": 1.7049, "step": 463200 }, { "epoch": 0.3, "learning_rate": 3.5196926032660906e-05, "loss": 1.7054, "step": 463300 }, { "epoch": 0.3, "learning_rate": 3.5193723983349345e-05, "loss": 1.712, "step": 463400 }, { "epoch": 0.3, "learning_rate": 3.5190521934037785e-05, "loss": 1.6706, "step": 463500 }, { "epoch": 0.3, "learning_rate": 3.5187319884726225e-05, "loss": 1.7044, "step": 463600 }, { "epoch": 0.3, "learning_rate": 3.5184117835414665e-05, "loss": 1.7074, "step": 463700 }, { "epoch": 0.3, "learning_rate": 3.518091578610311e-05, "loss": 1.7059, "step": 463800 }, { "epoch": 0.3, "learning_rate": 3.517771373679155e-05, "loss": 1.6938, "step": 463900 }, { "epoch": 0.3, "learning_rate": 3.517451168747999e-05, "loss": 1.6991, "step": 464000 }, { "epoch": 0.3, "eval_loss": 1.7393056154251099, "eval_runtime": 94.0491, "eval_samples_per_second": 106.327, "eval_steps_per_second": 6.645, "step": 464000 }, { "epoch": 0.3, "learning_rate": 3.517130963816843e-05, "loss": 1.7108, "step": 464100 }, { "epoch": 0.3, "learning_rate": 3.516810758885687e-05, "loss": 1.7038, "step": 464200 }, { "epoch": 0.3, "learning_rate": 3.516490553954531e-05, "loss": 1.7258, "step": 464300 }, { "epoch": 0.3, "learning_rate": 3.516170349023375e-05, "loss": 1.7176, "step": 464400 }, { "epoch": 0.3, "learning_rate": 3.515850144092219e-05, "loss": 1.699, "step": 464500 }, { "epoch": 0.3, "learning_rate": 3.515529939161063e-05, "loss": 1.7179, "step": 464600 }, { "epoch": 0.3, "learning_rate": 3.5152097342299075e-05, "loss": 1.6828, "step": 464700 }, { "epoch": 0.3, "learning_rate": 3.5148895292987515e-05, "loss": 1.712, "step": 464800 }, { "epoch": 0.3, "learning_rate": 3.5145693243675955e-05, "loss": 1.6937, "step": 464900 }, { "epoch": 0.3, "learning_rate": 3.5142491194364395e-05, "loss": 1.6956, "step": 465000 }, { "epoch": 0.3, "eval_loss": 1.7402431964874268, "eval_runtime": 89.9301, "eval_samples_per_second": 111.197, "eval_steps_per_second": 6.95, "step": 465000 }, { "epoch": 0.3, "learning_rate": 3.5139289145052834e-05, "loss": 1.6942, "step": 465100 }, { "epoch": 0.3, "learning_rate": 3.5136087095741274e-05, "loss": 1.6967, "step": 465200 }, { "epoch": 0.3, "learning_rate": 3.5132885046429714e-05, "loss": 1.695, "step": 465300 }, { "epoch": 0.3, "learning_rate": 3.512968299711816e-05, "loss": 1.7275, "step": 465400 }, { "epoch": 0.3, "learning_rate": 3.51264809478066e-05, "loss": 1.7122, "step": 465500 }, { "epoch": 0.3, "learning_rate": 3.512327889849504e-05, "loss": 1.7191, "step": 465600 }, { "epoch": 0.3, "learning_rate": 3.512007684918348e-05, "loss": 1.695, "step": 465700 }, { "epoch": 0.3, "learning_rate": 3.511687479987192e-05, "loss": 1.7192, "step": 465800 }, { "epoch": 0.3, "learning_rate": 3.511367275056036e-05, "loss": 1.6945, "step": 465900 }, { "epoch": 0.3, "learning_rate": 3.51104707012488e-05, "loss": 1.7209, "step": 466000 }, { "epoch": 0.3, "eval_loss": 1.740830898284912, "eval_runtime": 91.9053, "eval_samples_per_second": 108.808, "eval_steps_per_second": 6.8, "step": 466000 }, { "epoch": 0.3, "learning_rate": 3.510726865193724e-05, "loss": 1.7077, "step": 466100 }, { "epoch": 0.3, "learning_rate": 3.5104066602625685e-05, "loss": 1.6936, "step": 466200 }, { "epoch": 0.3, "learning_rate": 3.5100864553314125e-05, "loss": 1.7194, "step": 466300 }, { "epoch": 0.3, "learning_rate": 3.5097662504002565e-05, "loss": 1.7168, "step": 466400 }, { "epoch": 0.3, "learning_rate": 3.5094460454691004e-05, "loss": 1.7059, "step": 466500 }, { "epoch": 0.3, "learning_rate": 3.5091258405379444e-05, "loss": 1.7155, "step": 466600 }, { "epoch": 0.3, "learning_rate": 3.5088056356067884e-05, "loss": 1.7135, "step": 466700 }, { "epoch": 0.3, "learning_rate": 3.5084854306756324e-05, "loss": 1.7103, "step": 466800 }, { "epoch": 0.3, "learning_rate": 3.508165225744476e-05, "loss": 1.6917, "step": 466900 }, { "epoch": 0.3, "learning_rate": 3.507845020813321e-05, "loss": 1.727, "step": 467000 }, { "epoch": 0.3, "eval_loss": 1.7389824390411377, "eval_runtime": 89.0116, "eval_samples_per_second": 112.345, "eval_steps_per_second": 7.022, "step": 467000 }, { "epoch": 0.3, "learning_rate": 3.507524815882165e-05, "loss": 1.7012, "step": 467100 }, { "epoch": 0.3, "learning_rate": 3.507204610951009e-05, "loss": 1.709, "step": 467200 }, { "epoch": 0.3, "learning_rate": 3.506884406019853e-05, "loss": 1.7231, "step": 467300 }, { "epoch": 0.3, "learning_rate": 3.506564201088697e-05, "loss": 1.7206, "step": 467400 }, { "epoch": 0.3, "learning_rate": 3.506243996157541e-05, "loss": 1.6914, "step": 467500 }, { "epoch": 0.3, "learning_rate": 3.505923791226385e-05, "loss": 1.7006, "step": 467600 }, { "epoch": 0.3, "learning_rate": 3.505603586295229e-05, "loss": 1.7015, "step": 467700 }, { "epoch": 0.3, "learning_rate": 3.5052833813640734e-05, "loss": 1.6853, "step": 467800 }, { "epoch": 0.3, "learning_rate": 3.5049631764329174e-05, "loss": 1.6896, "step": 467900 }, { "epoch": 0.3, "learning_rate": 3.5046429715017614e-05, "loss": 1.6983, "step": 468000 }, { "epoch": 0.3, "eval_loss": 1.7429486513137817, "eval_runtime": 89.8264, "eval_samples_per_second": 111.326, "eval_steps_per_second": 6.958, "step": 468000 }, { "epoch": 0.3, "learning_rate": 3.5043227665706054e-05, "loss": 1.6882, "step": 468100 }, { "epoch": 0.3, "learning_rate": 3.5040025616394493e-05, "loss": 1.716, "step": 468200 }, { "epoch": 0.3, "learning_rate": 3.503682356708293e-05, "loss": 1.6969, "step": 468300 }, { "epoch": 0.3, "learning_rate": 3.503362151777137e-05, "loss": 1.7215, "step": 468400 }, { "epoch": 0.3, "learning_rate": 3.503041946845982e-05, "loss": 1.7206, "step": 468500 }, { "epoch": 0.3, "learning_rate": 3.502721741914826e-05, "loss": 1.7016, "step": 468600 }, { "epoch": 0.3, "learning_rate": 3.50240153698367e-05, "loss": 1.7238, "step": 468700 }, { "epoch": 0.3, "learning_rate": 3.502081332052514e-05, "loss": 1.7105, "step": 468800 }, { "epoch": 0.3, "learning_rate": 3.501761127121358e-05, "loss": 1.7063, "step": 468900 }, { "epoch": 0.3, "learning_rate": 3.501440922190202e-05, "loss": 1.7128, "step": 469000 }, { "epoch": 0.3, "eval_loss": 1.7388687133789062, "eval_runtime": 94.8871, "eval_samples_per_second": 105.388, "eval_steps_per_second": 6.587, "step": 469000 }, { "epoch": 0.3, "learning_rate": 3.501120717259046e-05, "loss": 1.7099, "step": 469100 }, { "epoch": 0.3, "learning_rate": 3.50080051232789e-05, "loss": 1.6949, "step": 469200 }, { "epoch": 0.3, "learning_rate": 3.500480307396734e-05, "loss": 1.7056, "step": 469300 }, { "epoch": 0.3, "learning_rate": 3.5001601024655784e-05, "loss": 1.7132, "step": 469400 }, { "epoch": 0.3, "learning_rate": 3.4998398975344224e-05, "loss": 1.7082, "step": 469500 }, { "epoch": 0.3, "learning_rate": 3.499519692603266e-05, "loss": 1.7147, "step": 469600 }, { "epoch": 0.3, "learning_rate": 3.49919948767211e-05, "loss": 1.7175, "step": 469700 }, { "epoch": 0.3, "learning_rate": 3.498879282740954e-05, "loss": 1.6998, "step": 469800 }, { "epoch": 0.3, "learning_rate": 3.498559077809798e-05, "loss": 1.7066, "step": 469900 }, { "epoch": 0.3, "learning_rate": 3.498238872878642e-05, "loss": 1.6908, "step": 470000 }, { "epoch": 0.3, "eval_loss": 1.7425488233566284, "eval_runtime": 130.1459, "eval_samples_per_second": 76.837, "eval_steps_per_second": 4.802, "step": 470000 }, { "epoch": 0.3, "learning_rate": 3.497918667947487e-05, "loss": 1.7141, "step": 470100 }, { "epoch": 0.3, "learning_rate": 3.497598463016331e-05, "loss": 1.7202, "step": 470200 }, { "epoch": 0.3, "learning_rate": 3.497278258085175e-05, "loss": 1.678, "step": 470300 }, { "epoch": 0.3, "learning_rate": 3.496958053154019e-05, "loss": 1.6964, "step": 470400 }, { "epoch": 0.3, "learning_rate": 3.496637848222863e-05, "loss": 1.7045, "step": 470500 }, { "epoch": 0.3, "learning_rate": 3.496317643291707e-05, "loss": 1.7103, "step": 470600 }, { "epoch": 0.3, "learning_rate": 3.495997438360551e-05, "loss": 1.7298, "step": 470700 }, { "epoch": 0.3, "learning_rate": 3.4956772334293954e-05, "loss": 1.7046, "step": 470800 }, { "epoch": 0.3, "learning_rate": 3.495357028498239e-05, "loss": 1.697, "step": 470900 }, { "epoch": 0.3, "learning_rate": 3.495036823567083e-05, "loss": 1.7039, "step": 471000 }, { "epoch": 0.3, "eval_loss": 1.7433818578720093, "eval_runtime": 139.8921, "eval_samples_per_second": 71.484, "eval_steps_per_second": 4.468, "step": 471000 }, { "epoch": 0.3, "learning_rate": 3.494716618635927e-05, "loss": 1.71, "step": 471100 }, { "epoch": 0.3, "learning_rate": 3.494396413704771e-05, "loss": 1.7057, "step": 471200 }, { "epoch": 0.3, "learning_rate": 3.494076208773615e-05, "loss": 1.71, "step": 471300 }, { "epoch": 0.3, "learning_rate": 3.493756003842459e-05, "loss": 1.6852, "step": 471400 }, { "epoch": 0.3, "learning_rate": 3.493435798911303e-05, "loss": 1.7001, "step": 471500 }, { "epoch": 0.3, "learning_rate": 3.493115593980147e-05, "loss": 1.7088, "step": 471600 }, { "epoch": 0.3, "learning_rate": 3.492795389048992e-05, "loss": 1.7214, "step": 471700 }, { "epoch": 0.3, "learning_rate": 3.492475184117836e-05, "loss": 1.704, "step": 471800 }, { "epoch": 0.3, "learning_rate": 3.49215497918668e-05, "loss": 1.7061, "step": 471900 }, { "epoch": 0.3, "learning_rate": 3.491834774255524e-05, "loss": 1.7144, "step": 472000 }, { "epoch": 0.3, "eval_loss": 1.741185188293457, "eval_runtime": 134.9167, "eval_samples_per_second": 74.12, "eval_steps_per_second": 4.632, "step": 472000 }, { "epoch": 0.3, "learning_rate": 3.491514569324368e-05, "loss": 1.6995, "step": 472100 }, { "epoch": 0.3, "learning_rate": 3.491194364393212e-05, "loss": 1.6928, "step": 472200 }, { "epoch": 0.3, "learning_rate": 3.4908741594620557e-05, "loss": 1.7198, "step": 472300 }, { "epoch": 0.3, "learning_rate": 3.4905539545309e-05, "loss": 1.7169, "step": 472400 }, { "epoch": 0.3, "learning_rate": 3.4902337495997436e-05, "loss": 1.7, "step": 472500 }, { "epoch": 0.3, "learning_rate": 3.489913544668588e-05, "loss": 1.6977, "step": 472600 }, { "epoch": 0.3, "learning_rate": 3.489593339737432e-05, "loss": 1.7164, "step": 472700 }, { "epoch": 0.3, "learning_rate": 3.489273134806276e-05, "loss": 1.7043, "step": 472800 }, { "epoch": 0.3, "learning_rate": 3.48895292987512e-05, "loss": 1.7175, "step": 472900 }, { "epoch": 0.3, "learning_rate": 3.488632724943964e-05, "loss": 1.6849, "step": 473000 }, { "epoch": 0.3, "eval_loss": 1.7417367696762085, "eval_runtime": 96.2088, "eval_samples_per_second": 103.941, "eval_steps_per_second": 6.496, "step": 473000 }, { "epoch": 0.3, "learning_rate": 3.488312520012809e-05, "loss": 1.6967, "step": 473100 }, { "epoch": 0.3, "learning_rate": 3.487992315081652e-05, "loss": 1.6939, "step": 473200 }, { "epoch": 0.3, "learning_rate": 3.487672110150497e-05, "loss": 1.6989, "step": 473300 }, { "epoch": 0.3, "learning_rate": 3.487351905219341e-05, "loss": 1.6982, "step": 473400 }, { "epoch": 0.3, "learning_rate": 3.487031700288185e-05, "loss": 1.7023, "step": 473500 }, { "epoch": 0.3, "learning_rate": 3.486711495357029e-05, "loss": 1.7018, "step": 473600 }, { "epoch": 0.3, "learning_rate": 3.4863912904258726e-05, "loss": 1.6889, "step": 473700 }, { "epoch": 0.3, "learning_rate": 3.486071085494717e-05, "loss": 1.6887, "step": 473800 }, { "epoch": 0.3, "learning_rate": 3.4857508805635606e-05, "loss": 1.7079, "step": 473900 }, { "epoch": 0.3, "learning_rate": 3.485430675632405e-05, "loss": 1.7006, "step": 474000 }, { "epoch": 0.3, "eval_loss": 1.7414058446884155, "eval_runtime": 137.9492, "eval_samples_per_second": 72.49, "eval_steps_per_second": 4.531, "step": 474000 }, { "epoch": 0.3, "learning_rate": 3.4851104707012485e-05, "loss": 1.7016, "step": 474100 }, { "epoch": 0.3, "learning_rate": 3.484790265770093e-05, "loss": 1.7096, "step": 474200 }, { "epoch": 0.3, "learning_rate": 3.484470060838937e-05, "loss": 1.692, "step": 474300 }, { "epoch": 0.3, "learning_rate": 3.484149855907781e-05, "loss": 1.7122, "step": 474400 }, { "epoch": 0.3, "learning_rate": 3.483829650976625e-05, "loss": 1.7062, "step": 474500 }, { "epoch": 0.3, "learning_rate": 3.483509446045469e-05, "loss": 1.7087, "step": 474600 }, { "epoch": 0.3, "learning_rate": 3.483189241114314e-05, "loss": 1.7069, "step": 474700 }, { "epoch": 0.3, "learning_rate": 3.482869036183157e-05, "loss": 1.7175, "step": 474800 }, { "epoch": 0.3, "learning_rate": 3.482548831252002e-05, "loss": 1.7137, "step": 474900 }, { "epoch": 0.3, "learning_rate": 3.482228626320846e-05, "loss": 1.7173, "step": 475000 }, { "epoch": 0.3, "eval_loss": 1.739257574081421, "eval_runtime": 126.1016, "eval_samples_per_second": 79.301, "eval_steps_per_second": 4.956, "step": 475000 }, { "epoch": 0.3, "learning_rate": 3.4819084213896896e-05, "loss": 1.6977, "step": 475100 }, { "epoch": 0.3, "learning_rate": 3.4815882164585336e-05, "loss": 1.7243, "step": 475200 }, { "epoch": 0.3, "learning_rate": 3.4812680115273776e-05, "loss": 1.7023, "step": 475300 }, { "epoch": 0.3, "learning_rate": 3.480947806596222e-05, "loss": 1.7143, "step": 475400 }, { "epoch": 0.3, "learning_rate": 3.4806276016650655e-05, "loss": 1.7056, "step": 475500 }, { "epoch": 0.3, "learning_rate": 3.48030739673391e-05, "loss": 1.6825, "step": 475600 }, { "epoch": 0.3, "learning_rate": 3.4799871918027535e-05, "loss": 1.7105, "step": 475700 }, { "epoch": 0.3, "learning_rate": 3.479666986871598e-05, "loss": 1.7084, "step": 475800 }, { "epoch": 0.3, "learning_rate": 3.479346781940442e-05, "loss": 1.6781, "step": 475900 }, { "epoch": 0.3, "learning_rate": 3.479026577009286e-05, "loss": 1.698, "step": 476000 }, { "epoch": 0.3, "eval_loss": 1.741412878036499, "eval_runtime": 121.3292, "eval_samples_per_second": 82.42, "eval_steps_per_second": 5.151, "step": 476000 }, { "epoch": 0.3, "learning_rate": 3.478706372078131e-05, "loss": 1.6904, "step": 476100 }, { "epoch": 0.3, "learning_rate": 3.478386167146974e-05, "loss": 1.6763, "step": 476200 }, { "epoch": 0.3, "learning_rate": 3.478065962215819e-05, "loss": 1.6903, "step": 476300 }, { "epoch": 0.3, "learning_rate": 3.477745757284662e-05, "loss": 1.6965, "step": 476400 }, { "epoch": 0.3, "learning_rate": 3.4774255523535066e-05, "loss": 1.6936, "step": 476500 }, { "epoch": 0.31, "learning_rate": 3.4771053474223506e-05, "loss": 1.6833, "step": 476600 }, { "epoch": 0.31, "learning_rate": 3.4767851424911946e-05, "loss": 1.7119, "step": 476700 }, { "epoch": 0.31, "learning_rate": 3.4764649375600385e-05, "loss": 1.6976, "step": 476800 }, { "epoch": 0.31, "learning_rate": 3.4761447326288825e-05, "loss": 1.7134, "step": 476900 }, { "epoch": 0.31, "learning_rate": 3.475824527697727e-05, "loss": 1.6959, "step": 477000 }, { "epoch": 0.31, "eval_loss": 1.742285132408142, "eval_runtime": 90.4663, "eval_samples_per_second": 110.538, "eval_steps_per_second": 6.909, "step": 477000 }, { "epoch": 0.31, "learning_rate": 3.4755043227665705e-05, "loss": 1.687, "step": 477100 }, { "epoch": 0.31, "learning_rate": 3.475184117835415e-05, "loss": 1.6934, "step": 477200 }, { "epoch": 0.31, "learning_rate": 3.4748639129042584e-05, "loss": 1.7013, "step": 477300 }, { "epoch": 0.31, "learning_rate": 3.474543707973103e-05, "loss": 1.7066, "step": 477400 }, { "epoch": 0.31, "learning_rate": 3.474223503041947e-05, "loss": 1.7062, "step": 477500 }, { "epoch": 0.31, "learning_rate": 3.473903298110791e-05, "loss": 1.697, "step": 477600 }, { "epoch": 0.31, "learning_rate": 3.473583093179636e-05, "loss": 1.6934, "step": 477700 }, { "epoch": 0.31, "learning_rate": 3.473262888248479e-05, "loss": 1.7038, "step": 477800 }, { "epoch": 0.31, "learning_rate": 3.4729426833173236e-05, "loss": 1.7019, "step": 477900 }, { "epoch": 0.31, "learning_rate": 3.472622478386167e-05, "loss": 1.6845, "step": 478000 }, { "epoch": 0.31, "eval_loss": 1.7427252531051636, "eval_runtime": 91.3642, "eval_samples_per_second": 109.452, "eval_steps_per_second": 6.841, "step": 478000 }, { "epoch": 0.31, "learning_rate": 3.4723022734550116e-05, "loss": 1.6853, "step": 478100 }, { "epoch": 0.31, "learning_rate": 3.4719820685238555e-05, "loss": 1.686, "step": 478200 }, { "epoch": 0.31, "learning_rate": 3.4716618635926995e-05, "loss": 1.7074, "step": 478300 }, { "epoch": 0.31, "learning_rate": 3.4713416586615435e-05, "loss": 1.7001, "step": 478400 }, { "epoch": 0.31, "learning_rate": 3.4710214537303875e-05, "loss": 1.6999, "step": 478500 }, { "epoch": 0.31, "learning_rate": 3.470701248799232e-05, "loss": 1.7059, "step": 478600 }, { "epoch": 0.31, "learning_rate": 3.4703810438680754e-05, "loss": 1.7047, "step": 478700 }, { "epoch": 0.31, "learning_rate": 3.47006083893692e-05, "loss": 1.7016, "step": 478800 }, { "epoch": 0.31, "learning_rate": 3.4697406340057634e-05, "loss": 1.7077, "step": 478900 }, { "epoch": 0.31, "learning_rate": 3.469420429074608e-05, "loss": 1.6971, "step": 479000 }, { "epoch": 0.31, "eval_loss": 1.741091251373291, "eval_runtime": 94.7186, "eval_samples_per_second": 105.576, "eval_steps_per_second": 6.598, "step": 479000 }, { "epoch": 0.31, "learning_rate": 3.469100224143452e-05, "loss": 1.7045, "step": 479100 }, { "epoch": 0.31, "learning_rate": 3.468780019212296e-05, "loss": 1.7098, "step": 479200 }, { "epoch": 0.31, "learning_rate": 3.4684598142811406e-05, "loss": 1.7156, "step": 479300 }, { "epoch": 0.31, "learning_rate": 3.468139609349984e-05, "loss": 1.7195, "step": 479400 }, { "epoch": 0.31, "learning_rate": 3.4678194044188286e-05, "loss": 1.716, "step": 479500 }, { "epoch": 0.31, "learning_rate": 3.467499199487672e-05, "loss": 1.7003, "step": 479600 }, { "epoch": 0.31, "learning_rate": 3.4671789945565165e-05, "loss": 1.6894, "step": 479700 }, { "epoch": 0.31, "learning_rate": 3.4668587896253605e-05, "loss": 1.6881, "step": 479800 }, { "epoch": 0.31, "learning_rate": 3.4665385846942044e-05, "loss": 1.6928, "step": 479900 }, { "epoch": 0.31, "learning_rate": 3.4662183797630484e-05, "loss": 1.689, "step": 480000 }, { "epoch": 0.31, "eval_loss": 1.7393476963043213, "eval_runtime": 90.1253, "eval_samples_per_second": 110.957, "eval_steps_per_second": 6.935, "step": 480000 }, { "epoch": 0.31, "learning_rate": 3.4658981748318924e-05, "loss": 1.7124, "step": 480100 }, { "epoch": 0.31, "learning_rate": 3.465577969900737e-05, "loss": 1.6848, "step": 480200 }, { "epoch": 0.31, "learning_rate": 3.4652577649695803e-05, "loss": 1.7034, "step": 480300 }, { "epoch": 0.31, "learning_rate": 3.464937560038425e-05, "loss": 1.6872, "step": 480400 }, { "epoch": 0.31, "learning_rate": 3.464617355107268e-05, "loss": 1.6782, "step": 480500 }, { "epoch": 0.31, "learning_rate": 3.464297150176113e-05, "loss": 1.6955, "step": 480600 }, { "epoch": 0.31, "learning_rate": 3.463976945244957e-05, "loss": 1.6919, "step": 480700 }, { "epoch": 0.31, "learning_rate": 3.463656740313801e-05, "loss": 1.7177, "step": 480800 }, { "epoch": 0.31, "learning_rate": 3.4633365353826455e-05, "loss": 1.6963, "step": 480900 }, { "epoch": 0.31, "learning_rate": 3.463016330451489e-05, "loss": 1.6877, "step": 481000 }, { "epoch": 0.31, "eval_loss": 1.741791844367981, "eval_runtime": 91.447, "eval_samples_per_second": 109.353, "eval_steps_per_second": 6.835, "step": 481000 }, { "epoch": 0.31, "learning_rate": 3.4626961255203335e-05, "loss": 1.6995, "step": 481100 }, { "epoch": 0.31, "learning_rate": 3.462375920589177e-05, "loss": 1.6919, "step": 481200 }, { "epoch": 0.31, "learning_rate": 3.4620557156580214e-05, "loss": 1.7121, "step": 481300 }, { "epoch": 0.31, "learning_rate": 3.4617355107268654e-05, "loss": 1.7047, "step": 481400 }, { "epoch": 0.31, "learning_rate": 3.4614153057957094e-05, "loss": 1.6843, "step": 481500 }, { "epoch": 0.31, "learning_rate": 3.4610951008645534e-05, "loss": 1.7089, "step": 481600 }, { "epoch": 0.31, "learning_rate": 3.460774895933397e-05, "loss": 1.6808, "step": 481700 }, { "epoch": 0.31, "learning_rate": 3.460454691002242e-05, "loss": 1.7124, "step": 481800 }, { "epoch": 0.31, "learning_rate": 3.460134486071085e-05, "loss": 1.6999, "step": 481900 }, { "epoch": 0.31, "learning_rate": 3.45981428113993e-05, "loss": 1.6924, "step": 482000 }, { "epoch": 0.31, "eval_loss": 1.7396374940872192, "eval_runtime": 92.1467, "eval_samples_per_second": 108.523, "eval_steps_per_second": 6.783, "step": 482000 }, { "epoch": 0.31, "learning_rate": 3.459494076208773e-05, "loss": 1.6847, "step": 482100 }, { "epoch": 0.31, "learning_rate": 3.459173871277618e-05, "loss": 1.7026, "step": 482200 }, { "epoch": 0.31, "learning_rate": 3.458853666346462e-05, "loss": 1.7002, "step": 482300 }, { "epoch": 0.31, "learning_rate": 3.458533461415306e-05, "loss": 1.7061, "step": 482400 }, { "epoch": 0.31, "learning_rate": 3.4582132564841505e-05, "loss": 1.699, "step": 482500 }, { "epoch": 0.31, "learning_rate": 3.457893051552994e-05, "loss": 1.6889, "step": 482600 }, { "epoch": 0.31, "learning_rate": 3.4575728466218384e-05, "loss": 1.6974, "step": 482700 }, { "epoch": 0.31, "learning_rate": 3.457252641690682e-05, "loss": 1.693, "step": 482800 }, { "epoch": 0.31, "learning_rate": 3.4569324367595264e-05, "loss": 1.7091, "step": 482900 }, { "epoch": 0.31, "learning_rate": 3.4566122318283703e-05, "loss": 1.7065, "step": 483000 }, { "epoch": 0.31, "eval_loss": 1.7434509992599487, "eval_runtime": 89.0362, "eval_samples_per_second": 112.314, "eval_steps_per_second": 7.02, "step": 483000 }, { "epoch": 0.31, "learning_rate": 3.456292026897214e-05, "loss": 1.7108, "step": 483100 }, { "epoch": 0.31, "learning_rate": 3.455971821966058e-05, "loss": 1.6761, "step": 483200 }, { "epoch": 0.31, "learning_rate": 3.455651617034902e-05, "loss": 1.704, "step": 483300 }, { "epoch": 0.31, "learning_rate": 3.455331412103747e-05, "loss": 1.7013, "step": 483400 }, { "epoch": 0.31, "learning_rate": 3.45501120717259e-05, "loss": 1.7087, "step": 483500 }, { "epoch": 0.31, "learning_rate": 3.454691002241435e-05, "loss": 1.6951, "step": 483600 }, { "epoch": 0.31, "learning_rate": 3.454370797310279e-05, "loss": 1.6852, "step": 483700 }, { "epoch": 0.31, "learning_rate": 3.454050592379123e-05, "loss": 1.7144, "step": 483800 }, { "epoch": 0.31, "learning_rate": 3.453730387447967e-05, "loss": 1.6808, "step": 483900 }, { "epoch": 0.31, "learning_rate": 3.453410182516811e-05, "loss": 1.6963, "step": 484000 }, { "epoch": 0.31, "eval_loss": 1.738284707069397, "eval_runtime": 92.8724, "eval_samples_per_second": 107.675, "eval_steps_per_second": 6.73, "step": 484000 }, { "epoch": 0.31, "learning_rate": 3.4530899775856554e-05, "loss": 1.7032, "step": 484100 }, { "epoch": 0.31, "learning_rate": 3.452769772654499e-05, "loss": 1.6889, "step": 484200 }, { "epoch": 0.31, "learning_rate": 3.4524495677233434e-05, "loss": 1.6946, "step": 484300 }, { "epoch": 0.31, "learning_rate": 3.452129362792187e-05, "loss": 1.7118, "step": 484400 }, { "epoch": 0.31, "learning_rate": 3.451809157861031e-05, "loss": 1.6952, "step": 484500 }, { "epoch": 0.31, "learning_rate": 3.451488952929875e-05, "loss": 1.6791, "step": 484600 }, { "epoch": 0.31, "learning_rate": 3.451168747998719e-05, "loss": 1.7045, "step": 484700 }, { "epoch": 0.31, "learning_rate": 3.450848543067563e-05, "loss": 1.6853, "step": 484800 }, { "epoch": 0.31, "learning_rate": 3.450528338136407e-05, "loss": 1.7055, "step": 484900 }, { "epoch": 0.31, "learning_rate": 3.450208133205252e-05, "loss": 1.6961, "step": 485000 }, { "epoch": 0.31, "eval_loss": 1.73981773853302, "eval_runtime": 90.4405, "eval_samples_per_second": 110.57, "eval_steps_per_second": 6.911, "step": 485000 }, { "epoch": 0.31, "learning_rate": 3.449887928274095e-05, "loss": 1.6938, "step": 485100 }, { "epoch": 0.31, "learning_rate": 3.44956772334294e-05, "loss": 1.7046, "step": 485200 }, { "epoch": 0.31, "learning_rate": 3.449247518411784e-05, "loss": 1.7167, "step": 485300 }, { "epoch": 0.31, "learning_rate": 3.448927313480628e-05, "loss": 1.7011, "step": 485400 }, { "epoch": 0.31, "learning_rate": 3.448607108549472e-05, "loss": 1.6913, "step": 485500 }, { "epoch": 0.31, "learning_rate": 3.448286903618316e-05, "loss": 1.6868, "step": 485600 }, { "epoch": 0.31, "learning_rate": 3.4479666986871604e-05, "loss": 1.6967, "step": 485700 }, { "epoch": 0.31, "learning_rate": 3.4476464937560037e-05, "loss": 1.7084, "step": 485800 }, { "epoch": 0.31, "learning_rate": 3.447326288824848e-05, "loss": 1.6864, "step": 485900 }, { "epoch": 0.31, "learning_rate": 3.447006083893692e-05, "loss": 1.6977, "step": 486000 }, { "epoch": 0.31, "eval_loss": 1.7388570308685303, "eval_runtime": 88.3149, "eval_samples_per_second": 113.231, "eval_steps_per_second": 7.077, "step": 486000 }, { "epoch": 0.31, "learning_rate": 3.446685878962536e-05, "loss": 1.6927, "step": 486100 }, { "epoch": 0.31, "learning_rate": 3.44636567403138e-05, "loss": 1.6975, "step": 486200 }, { "epoch": 0.31, "learning_rate": 3.446045469100224e-05, "loss": 1.7021, "step": 486300 }, { "epoch": 0.31, "learning_rate": 3.445725264169068e-05, "loss": 1.7123, "step": 486400 }, { "epoch": 0.31, "learning_rate": 3.445405059237912e-05, "loss": 1.7088, "step": 486500 }, { "epoch": 0.31, "learning_rate": 3.445084854306757e-05, "loss": 1.6976, "step": 486600 }, { "epoch": 0.31, "learning_rate": 3.4447646493756e-05, "loss": 1.7147, "step": 486700 }, { "epoch": 0.31, "learning_rate": 3.444444444444445e-05, "loss": 1.7035, "step": 486800 }, { "epoch": 0.31, "learning_rate": 3.444124239513289e-05, "loss": 1.6808, "step": 486900 }, { "epoch": 0.31, "learning_rate": 3.443804034582133e-05, "loss": 1.6835, "step": 487000 }, { "epoch": 0.31, "eval_loss": 1.7411023378372192, "eval_runtime": 90.5616, "eval_samples_per_second": 110.422, "eval_steps_per_second": 6.901, "step": 487000 }, { "epoch": 0.31, "learning_rate": 3.443483829650977e-05, "loss": 1.6921, "step": 487100 }, { "epoch": 0.31, "learning_rate": 3.4431636247198206e-05, "loss": 1.695, "step": 487200 }, { "epoch": 0.31, "learning_rate": 3.442843419788665e-05, "loss": 1.6854, "step": 487300 }, { "epoch": 0.31, "learning_rate": 3.4425232148575086e-05, "loss": 1.678, "step": 487400 }, { "epoch": 0.31, "learning_rate": 3.442203009926353e-05, "loss": 1.7016, "step": 487500 }, { "epoch": 0.31, "learning_rate": 3.441882804995197e-05, "loss": 1.7059, "step": 487600 }, { "epoch": 0.31, "learning_rate": 3.441562600064041e-05, "loss": 1.7013, "step": 487700 }, { "epoch": 0.31, "learning_rate": 3.441242395132885e-05, "loss": 1.7026, "step": 487800 }, { "epoch": 0.31, "learning_rate": 3.440922190201729e-05, "loss": 1.7029, "step": 487900 }, { "epoch": 0.31, "learning_rate": 3.440601985270573e-05, "loss": 1.7023, "step": 488000 }, { "epoch": 0.31, "eval_loss": 1.741432785987854, "eval_runtime": 91.3993, "eval_samples_per_second": 109.41, "eval_steps_per_second": 6.838, "step": 488000 }, { "epoch": 0.31, "learning_rate": 3.440281780339417e-05, "loss": 1.7178, "step": 488100 }, { "epoch": 0.31, "learning_rate": 3.439961575408262e-05, "loss": 1.7066, "step": 488200 }, { "epoch": 0.31, "learning_rate": 3.439641370477106e-05, "loss": 1.704, "step": 488300 }, { "epoch": 0.31, "learning_rate": 3.43932116554595e-05, "loss": 1.7036, "step": 488400 }, { "epoch": 0.31, "learning_rate": 3.4390009606147937e-05, "loss": 1.7004, "step": 488500 }, { "epoch": 0.31, "learning_rate": 3.4386807556836376e-05, "loss": 1.7065, "step": 488600 }, { "epoch": 0.31, "learning_rate": 3.4383605507524816e-05, "loss": 1.7005, "step": 488700 }, { "epoch": 0.31, "learning_rate": 3.4380403458213256e-05, "loss": 1.6924, "step": 488800 }, { "epoch": 0.31, "learning_rate": 3.43772014089017e-05, "loss": 1.6978, "step": 488900 }, { "epoch": 0.31, "learning_rate": 3.4373999359590135e-05, "loss": 1.7177, "step": 489000 }, { "epoch": 0.31, "eval_loss": 1.74038827419281, "eval_runtime": 92.3123, "eval_samples_per_second": 108.328, "eval_steps_per_second": 6.77, "step": 489000 }, { "epoch": 0.31, "learning_rate": 3.437079731027858e-05, "loss": 1.6802, "step": 489100 }, { "epoch": 0.31, "learning_rate": 3.436759526096702e-05, "loss": 1.6877, "step": 489200 }, { "epoch": 0.31, "learning_rate": 3.436439321165546e-05, "loss": 1.6985, "step": 489300 }, { "epoch": 0.31, "learning_rate": 3.43611911623439e-05, "loss": 1.7065, "step": 489400 }, { "epoch": 0.31, "learning_rate": 3.435798911303234e-05, "loss": 1.7109, "step": 489500 }, { "epoch": 0.31, "learning_rate": 3.435478706372078e-05, "loss": 1.6942, "step": 489600 }, { "epoch": 0.31, "learning_rate": 3.435158501440922e-05, "loss": 1.6988, "step": 489700 }, { "epoch": 0.31, "learning_rate": 3.434838296509767e-05, "loss": 1.6794, "step": 489800 }, { "epoch": 0.31, "learning_rate": 3.4345180915786106e-05, "loss": 1.6889, "step": 489900 }, { "epoch": 0.31, "learning_rate": 3.4341978866474546e-05, "loss": 1.6966, "step": 490000 }, { "epoch": 0.31, "eval_loss": 1.7444579601287842, "eval_runtime": 89.6138, "eval_samples_per_second": 111.59, "eval_steps_per_second": 6.974, "step": 490000 }, { "epoch": 0.31, "learning_rate": 3.4338776817162986e-05, "loss": 1.6919, "step": 490100 }, { "epoch": 0.31, "learning_rate": 3.4335574767851426e-05, "loss": 1.6873, "step": 490200 }, { "epoch": 0.31, "learning_rate": 3.4332372718539865e-05, "loss": 1.6862, "step": 490300 }, { "epoch": 0.31, "learning_rate": 3.4329170669228305e-05, "loss": 1.6837, "step": 490400 }, { "epoch": 0.31, "learning_rate": 3.432596861991675e-05, "loss": 1.691, "step": 490500 }, { "epoch": 0.31, "learning_rate": 3.432276657060519e-05, "loss": 1.6945, "step": 490600 }, { "epoch": 0.31, "learning_rate": 3.431956452129363e-05, "loss": 1.7017, "step": 490700 }, { "epoch": 0.31, "learning_rate": 3.431636247198207e-05, "loss": 1.687, "step": 490800 }, { "epoch": 0.31, "learning_rate": 3.431316042267051e-05, "loss": 1.6964, "step": 490900 }, { "epoch": 0.31, "learning_rate": 3.430995837335895e-05, "loss": 1.6883, "step": 491000 }, { "epoch": 0.31, "eval_loss": 1.742876410484314, "eval_runtime": 92.4304, "eval_samples_per_second": 108.189, "eval_steps_per_second": 6.762, "step": 491000 }, { "epoch": 0.31, "learning_rate": 3.430675632404739e-05, "loss": 1.6838, "step": 491100 }, { "epoch": 0.31, "learning_rate": 3.430355427473583e-05, "loss": 1.6906, "step": 491200 }, { "epoch": 0.31, "learning_rate": 3.430035222542427e-05, "loss": 1.6832, "step": 491300 }, { "epoch": 0.31, "learning_rate": 3.4297150176112716e-05, "loss": 1.6673, "step": 491400 }, { "epoch": 0.31, "learning_rate": 3.4293948126801156e-05, "loss": 1.6871, "step": 491500 }, { "epoch": 0.31, "learning_rate": 3.4290746077489596e-05, "loss": 1.6982, "step": 491600 }, { "epoch": 0.31, "learning_rate": 3.4287544028178035e-05, "loss": 1.7363, "step": 491700 }, { "epoch": 0.31, "learning_rate": 3.4284341978866475e-05, "loss": 1.6999, "step": 491800 }, { "epoch": 0.31, "learning_rate": 3.4281139929554915e-05, "loss": 1.7063, "step": 491900 }, { "epoch": 0.31, "learning_rate": 3.4277937880243355e-05, "loss": 1.6964, "step": 492000 }, { "epoch": 0.31, "eval_loss": 1.742271900177002, "eval_runtime": 89.1208, "eval_samples_per_second": 112.207, "eval_steps_per_second": 7.013, "step": 492000 }, { "epoch": 0.31, "learning_rate": 3.42747358309318e-05, "loss": 1.6639, "step": 492100 }, { "epoch": 0.32, "learning_rate": 3.427153378162024e-05, "loss": 1.6935, "step": 492200 }, { "epoch": 0.32, "learning_rate": 3.426833173230868e-05, "loss": 1.7111, "step": 492300 }, { "epoch": 0.32, "learning_rate": 3.426512968299712e-05, "loss": 1.7175, "step": 492400 }, { "epoch": 0.32, "learning_rate": 3.426192763368556e-05, "loss": 1.6764, "step": 492500 }, { "epoch": 0.32, "learning_rate": 3.4258725584374e-05, "loss": 1.682, "step": 492600 }, { "epoch": 0.32, "learning_rate": 3.425552353506244e-05, "loss": 1.7051, "step": 492700 }, { "epoch": 0.32, "learning_rate": 3.425232148575088e-05, "loss": 1.7071, "step": 492800 }, { "epoch": 0.32, "learning_rate": 3.4249119436439326e-05, "loss": 1.7024, "step": 492900 }, { "epoch": 0.32, "learning_rate": 3.4245917387127765e-05, "loss": 1.7043, "step": 493000 }, { "epoch": 0.32, "eval_loss": 1.7407526969909668, "eval_runtime": 92.9799, "eval_samples_per_second": 107.55, "eval_steps_per_second": 6.722, "step": 493000 }, { "epoch": 0.32, "learning_rate": 3.4242715337816205e-05, "loss": 1.694, "step": 493100 }, { "epoch": 0.32, "learning_rate": 3.4239513288504645e-05, "loss": 1.6893, "step": 493200 }, { "epoch": 0.32, "learning_rate": 3.4236311239193085e-05, "loss": 1.698, "step": 493300 }, { "epoch": 0.32, "learning_rate": 3.4233109189881524e-05, "loss": 1.7244, "step": 493400 }, { "epoch": 0.32, "learning_rate": 3.4229907140569964e-05, "loss": 1.7048, "step": 493500 }, { "epoch": 0.32, "learning_rate": 3.422670509125841e-05, "loss": 1.6879, "step": 493600 }, { "epoch": 0.32, "learning_rate": 3.422350304194685e-05, "loss": 1.7084, "step": 493700 }, { "epoch": 0.32, "learning_rate": 3.422030099263529e-05, "loss": 1.6969, "step": 493800 }, { "epoch": 0.32, "learning_rate": 3.421709894332373e-05, "loss": 1.6905, "step": 493900 }, { "epoch": 0.32, "learning_rate": 3.421389689401217e-05, "loss": 1.6904, "step": 494000 }, { "epoch": 0.32, "eval_loss": 1.7397140264511108, "eval_runtime": 89.647, "eval_samples_per_second": 111.549, "eval_steps_per_second": 6.972, "step": 494000 }, { "epoch": 0.32, "learning_rate": 3.421069484470061e-05, "loss": 1.7169, "step": 494100 }, { "epoch": 0.32, "learning_rate": 3.420749279538905e-05, "loss": 1.7087, "step": 494200 }, { "epoch": 0.32, "learning_rate": 3.420429074607749e-05, "loss": 1.7024, "step": 494300 }, { "epoch": 0.32, "learning_rate": 3.420108869676593e-05, "loss": 1.6914, "step": 494400 }, { "epoch": 0.32, "learning_rate": 3.4197886647454375e-05, "loss": 1.6868, "step": 494500 }, { "epoch": 0.32, "learning_rate": 3.4194684598142815e-05, "loss": 1.7057, "step": 494600 }, { "epoch": 0.32, "learning_rate": 3.4191482548831255e-05, "loss": 1.6892, "step": 494700 }, { "epoch": 0.32, "learning_rate": 3.4188280499519694e-05, "loss": 1.6672, "step": 494800 }, { "epoch": 0.32, "learning_rate": 3.4185078450208134e-05, "loss": 1.6915, "step": 494900 }, { "epoch": 0.32, "learning_rate": 3.4181876400896574e-05, "loss": 1.6752, "step": 495000 }, { "epoch": 0.32, "eval_loss": 1.7419145107269287, "eval_runtime": 87.9053, "eval_samples_per_second": 113.759, "eval_steps_per_second": 7.11, "step": 495000 }, { "epoch": 0.32, "learning_rate": 3.4178674351585014e-05, "loss": 1.6948, "step": 495100 }, { "epoch": 0.32, "learning_rate": 3.417547230227346e-05, "loss": 1.6756, "step": 495200 }, { "epoch": 0.32, "learning_rate": 3.41722702529619e-05, "loss": 1.6855, "step": 495300 }, { "epoch": 0.32, "learning_rate": 3.416906820365034e-05, "loss": 1.6752, "step": 495400 }, { "epoch": 0.32, "learning_rate": 3.416586615433878e-05, "loss": 1.6887, "step": 495500 }, { "epoch": 0.32, "learning_rate": 3.416266410502722e-05, "loss": 1.7008, "step": 495600 }, { "epoch": 0.32, "learning_rate": 3.415946205571566e-05, "loss": 1.6975, "step": 495700 }, { "epoch": 0.32, "learning_rate": 3.41562600064041e-05, "loss": 1.6914, "step": 495800 }, { "epoch": 0.32, "learning_rate": 3.4153057957092545e-05, "loss": 1.6876, "step": 495900 }, { "epoch": 0.32, "learning_rate": 3.414985590778098e-05, "loss": 1.6859, "step": 496000 }, { "epoch": 0.32, "eval_loss": 1.7413986921310425, "eval_runtime": 92.9722, "eval_samples_per_second": 107.559, "eval_steps_per_second": 6.722, "step": 496000 }, { "epoch": 0.32, "learning_rate": 3.4146653858469424e-05, "loss": 1.7041, "step": 496100 }, { "epoch": 0.32, "learning_rate": 3.4143451809157864e-05, "loss": 1.7111, "step": 496200 }, { "epoch": 0.32, "learning_rate": 3.4140249759846304e-05, "loss": 1.6986, "step": 496300 }, { "epoch": 0.32, "learning_rate": 3.4137047710534744e-05, "loss": 1.6993, "step": 496400 }, { "epoch": 0.32, "learning_rate": 3.4133845661223183e-05, "loss": 1.7022, "step": 496500 }, { "epoch": 0.32, "learning_rate": 3.413064361191162e-05, "loss": 1.7001, "step": 496600 }, { "epoch": 0.32, "learning_rate": 3.412744156260006e-05, "loss": 1.6846, "step": 496700 }, { "epoch": 0.32, "learning_rate": 3.412423951328851e-05, "loss": 1.7094, "step": 496800 }, { "epoch": 0.32, "learning_rate": 3.412103746397695e-05, "loss": 1.6835, "step": 496900 }, { "epoch": 0.32, "learning_rate": 3.411783541466539e-05, "loss": 1.683, "step": 497000 }, { "epoch": 0.32, "eval_loss": 1.7397769689559937, "eval_runtime": 89.7721, "eval_samples_per_second": 111.393, "eval_steps_per_second": 6.962, "step": 497000 }, { "epoch": 0.32, "learning_rate": 3.411463336535383e-05, "loss": 1.6977, "step": 497100 }, { "epoch": 0.32, "learning_rate": 3.411143131604227e-05, "loss": 1.6965, "step": 497200 }, { "epoch": 0.32, "learning_rate": 3.410822926673071e-05, "loss": 1.6895, "step": 497300 }, { "epoch": 0.32, "learning_rate": 3.410502721741915e-05, "loss": 1.6961, "step": 497400 }, { "epoch": 0.32, "learning_rate": 3.4101825168107594e-05, "loss": 1.7129, "step": 497500 }, { "epoch": 0.32, "learning_rate": 3.409862311879603e-05, "loss": 1.7036, "step": 497600 }, { "epoch": 0.32, "learning_rate": 3.4095421069484474e-05, "loss": 1.6974, "step": 497700 }, { "epoch": 0.32, "learning_rate": 3.4092219020172914e-05, "loss": 1.707, "step": 497800 }, { "epoch": 0.32, "learning_rate": 3.408901697086135e-05, "loss": 1.6749, "step": 497900 }, { "epoch": 0.32, "learning_rate": 3.408581492154979e-05, "loss": 1.6753, "step": 498000 }, { "epoch": 0.32, "eval_loss": 1.7405537366867065, "eval_runtime": 92.9249, "eval_samples_per_second": 107.614, "eval_steps_per_second": 6.726, "step": 498000 }, { "epoch": 0.32, "learning_rate": 3.408261287223823e-05, "loss": 1.6936, "step": 498100 }, { "epoch": 0.32, "learning_rate": 3.407941082292668e-05, "loss": 1.6906, "step": 498200 }, { "epoch": 0.32, "learning_rate": 3.407620877361511e-05, "loss": 1.71, "step": 498300 }, { "epoch": 0.32, "learning_rate": 3.407300672430356e-05, "loss": 1.6749, "step": 498400 }, { "epoch": 0.32, "learning_rate": 3.4069804674992e-05, "loss": 1.679, "step": 498500 }, { "epoch": 0.32, "learning_rate": 3.406660262568044e-05, "loss": 1.6815, "step": 498600 }, { "epoch": 0.32, "learning_rate": 3.406340057636888e-05, "loss": 1.6786, "step": 498700 }, { "epoch": 0.32, "learning_rate": 3.406019852705732e-05, "loss": 1.6894, "step": 498800 }, { "epoch": 0.32, "learning_rate": 3.405699647774576e-05, "loss": 1.7105, "step": 498900 }, { "epoch": 0.32, "learning_rate": 3.40537944284342e-05, "loss": 1.6973, "step": 499000 }, { "epoch": 0.32, "eval_loss": 1.7402945756912231, "eval_runtime": 88.7058, "eval_samples_per_second": 112.732, "eval_steps_per_second": 7.046, "step": 499000 }, { "epoch": 0.32, "learning_rate": 3.4050592379122644e-05, "loss": 1.6813, "step": 499100 }, { "epoch": 0.32, "learning_rate": 3.404739032981108e-05, "loss": 1.6922, "step": 499200 }, { "epoch": 0.32, "learning_rate": 3.404418828049952e-05, "loss": 1.6949, "step": 499300 }, { "epoch": 0.32, "learning_rate": 3.404098623118796e-05, "loss": 1.69, "step": 499400 }, { "epoch": 0.32, "learning_rate": 3.40377841818764e-05, "loss": 1.696, "step": 499500 }, { "epoch": 0.32, "learning_rate": 3.403458213256484e-05, "loss": 1.6906, "step": 499600 }, { "epoch": 0.32, "learning_rate": 3.403138008325328e-05, "loss": 1.6978, "step": 499700 }, { "epoch": 0.32, "learning_rate": 3.402817803394173e-05, "loss": 1.6904, "step": 499800 }, { "epoch": 0.32, "learning_rate": 3.402497598463016e-05, "loss": 1.6948, "step": 499900 }, { "epoch": 0.32, "learning_rate": 3.402177393531861e-05, "loss": 1.6817, "step": 500000 }, { "epoch": 0.32, "eval_loss": 1.7409558296203613, "eval_runtime": 93.6022, "eval_samples_per_second": 106.835, "eval_steps_per_second": 6.677, "step": 500000 }, { "epoch": 0.32, "learning_rate": 3.401857188600705e-05, "loss": 1.6824, "step": 500100 }, { "epoch": 0.32, "learning_rate": 3.401536983669549e-05, "loss": 1.6735, "step": 500200 }, { "epoch": 0.32, "learning_rate": 3.401216778738393e-05, "loss": 1.7034, "step": 500300 }, { "epoch": 0.32, "learning_rate": 3.400896573807237e-05, "loss": 1.6909, "step": 500400 }, { "epoch": 0.32, "learning_rate": 3.4005763688760814e-05, "loss": 1.6938, "step": 500500 }, { "epoch": 0.32, "learning_rate": 3.4002561639449247e-05, "loss": 1.7047, "step": 500600 }, { "epoch": 0.32, "learning_rate": 3.399935959013769e-05, "loss": 1.678, "step": 500700 }, { "epoch": 0.32, "learning_rate": 3.3996157540826126e-05, "loss": 1.7116, "step": 500800 }, { "epoch": 0.32, "learning_rate": 3.399295549151457e-05, "loss": 1.7084, "step": 500900 }, { "epoch": 0.32, "learning_rate": 3.398975344220301e-05, "loss": 1.6897, "step": 501000 }, { "epoch": 0.32, "eval_loss": 1.7434136867523193, "eval_runtime": 89.2414, "eval_samples_per_second": 112.056, "eval_steps_per_second": 7.003, "step": 501000 }, { "epoch": 0.32, "learning_rate": 3.398655139289145e-05, "loss": 1.6889, "step": 501100 }, { "epoch": 0.32, "learning_rate": 3.39833493435799e-05, "loss": 1.6996, "step": 501200 }, { "epoch": 0.32, "learning_rate": 3.398014729426833e-05, "loss": 1.6988, "step": 501300 }, { "epoch": 0.32, "learning_rate": 3.397694524495678e-05, "loss": 1.6853, "step": 501400 }, { "epoch": 0.32, "learning_rate": 3.397374319564521e-05, "loss": 1.6777, "step": 501500 }, { "epoch": 0.32, "learning_rate": 3.397054114633366e-05, "loss": 1.7058, "step": 501600 }, { "epoch": 0.32, "learning_rate": 3.39673390970221e-05, "loss": 1.6919, "step": 501700 }, { "epoch": 0.32, "learning_rate": 3.396413704771054e-05, "loss": 1.6921, "step": 501800 }, { "epoch": 0.32, "learning_rate": 3.396093499839898e-05, "loss": 1.6907, "step": 501900 }, { "epoch": 0.32, "learning_rate": 3.3957732949087416e-05, "loss": 1.7182, "step": 502000 }, { "epoch": 0.32, "eval_loss": 1.7396955490112305, "eval_runtime": 90.2408, "eval_samples_per_second": 110.815, "eval_steps_per_second": 6.926, "step": 502000 }, { "epoch": 0.32, "learning_rate": 3.395453089977586e-05, "loss": 1.6971, "step": 502100 }, { "epoch": 0.32, "learning_rate": 3.3951328850464296e-05, "loss": 1.6806, "step": 502200 }, { "epoch": 0.32, "learning_rate": 3.394812680115274e-05, "loss": 1.7069, "step": 502300 }, { "epoch": 0.32, "learning_rate": 3.3944924751841175e-05, "loss": 1.6755, "step": 502400 }, { "epoch": 0.32, "learning_rate": 3.394172270252962e-05, "loss": 1.6976, "step": 502500 }, { "epoch": 0.32, "learning_rate": 3.393852065321806e-05, "loss": 1.6833, "step": 502600 }, { "epoch": 0.32, "learning_rate": 3.39353186039065e-05, "loss": 1.6829, "step": 502700 }, { "epoch": 0.32, "learning_rate": 3.393211655459495e-05, "loss": 1.6999, "step": 502800 }, { "epoch": 0.32, "learning_rate": 3.392891450528338e-05, "loss": 1.6915, "step": 502900 }, { "epoch": 0.32, "learning_rate": 3.392571245597183e-05, "loss": 1.6934, "step": 503000 }, { "epoch": 0.32, "eval_loss": 1.741931676864624, "eval_runtime": 88.7864, "eval_samples_per_second": 112.63, "eval_steps_per_second": 7.039, "step": 503000 }, { "epoch": 0.32, "learning_rate": 3.392251040666026e-05, "loss": 1.689, "step": 503100 }, { "epoch": 0.32, "learning_rate": 3.391930835734871e-05, "loss": 1.7059, "step": 503200 }, { "epoch": 0.32, "learning_rate": 3.391610630803715e-05, "loss": 1.701, "step": 503300 }, { "epoch": 0.32, "learning_rate": 3.3912904258725586e-05, "loss": 1.6928, "step": 503400 }, { "epoch": 0.32, "learning_rate": 3.3909702209414026e-05, "loss": 1.6927, "step": 503500 }, { "epoch": 0.32, "learning_rate": 3.3906500160102466e-05, "loss": 1.6786, "step": 503600 }, { "epoch": 0.32, "learning_rate": 3.390329811079091e-05, "loss": 1.6862, "step": 503700 }, { "epoch": 0.32, "learning_rate": 3.3900096061479345e-05, "loss": 1.7098, "step": 503800 }, { "epoch": 0.32, "learning_rate": 3.389689401216779e-05, "loss": 1.6761, "step": 503900 }, { "epoch": 0.32, "learning_rate": 3.3893691962856225e-05, "loss": 1.6982, "step": 504000 }, { "epoch": 0.32, "eval_loss": 1.7395784854888916, "eval_runtime": 89.8967, "eval_samples_per_second": 111.239, "eval_steps_per_second": 6.952, "step": 504000 }, { "epoch": 0.32, "learning_rate": 3.389048991354467e-05, "loss": 1.6813, "step": 504100 }, { "epoch": 0.32, "learning_rate": 3.388728786423311e-05, "loss": 1.699, "step": 504200 }, { "epoch": 0.32, "learning_rate": 3.388408581492155e-05, "loss": 1.6997, "step": 504300 }, { "epoch": 0.32, "learning_rate": 3.388088376561e-05, "loss": 1.6953, "step": 504400 }, { "epoch": 0.32, "learning_rate": 3.387768171629843e-05, "loss": 1.6867, "step": 504500 }, { "epoch": 0.32, "learning_rate": 3.387447966698688e-05, "loss": 1.6958, "step": 504600 }, { "epoch": 0.32, "learning_rate": 3.387127761767531e-05, "loss": 1.7013, "step": 504700 }, { "epoch": 0.32, "learning_rate": 3.3868075568363756e-05, "loss": 1.6954, "step": 504800 }, { "epoch": 0.32, "learning_rate": 3.3864873519052196e-05, "loss": 1.6995, "step": 504900 }, { "epoch": 0.32, "learning_rate": 3.3861671469740636e-05, "loss": 1.691, "step": 505000 }, { "epoch": 0.32, "eval_loss": 1.7404372692108154, "eval_runtime": 92.5979, "eval_samples_per_second": 107.994, "eval_steps_per_second": 6.75, "step": 505000 }, { "epoch": 0.32, "learning_rate": 3.3858469420429075e-05, "loss": 1.6989, "step": 505100 }, { "epoch": 0.32, "learning_rate": 3.3855267371117515e-05, "loss": 1.6889, "step": 505200 }, { "epoch": 0.32, "learning_rate": 3.385206532180596e-05, "loss": 1.7198, "step": 505300 }, { "epoch": 0.32, "learning_rate": 3.3848863272494395e-05, "loss": 1.674, "step": 505400 }, { "epoch": 0.32, "learning_rate": 3.384566122318284e-05, "loss": 1.6834, "step": 505500 }, { "epoch": 0.32, "learning_rate": 3.3842459173871274e-05, "loss": 1.699, "step": 505600 }, { "epoch": 0.32, "learning_rate": 3.383925712455972e-05, "loss": 1.6997, "step": 505700 }, { "epoch": 0.32, "learning_rate": 3.383605507524816e-05, "loss": 1.7098, "step": 505800 }, { "epoch": 0.32, "learning_rate": 3.38328530259366e-05, "loss": 1.672, "step": 505900 }, { "epoch": 0.32, "learning_rate": 3.382965097662505e-05, "loss": 1.7113, "step": 506000 }, { "epoch": 0.32, "eval_loss": 1.7391902208328247, "eval_runtime": 88.6369, "eval_samples_per_second": 112.82, "eval_steps_per_second": 7.051, "step": 506000 }, { "epoch": 0.32, "learning_rate": 3.382644892731348e-05, "loss": 1.6864, "step": 506100 }, { "epoch": 0.32, "learning_rate": 3.3823246878001926e-05, "loss": 1.6816, "step": 506200 }, { "epoch": 0.32, "learning_rate": 3.382004482869036e-05, "loss": 1.6898, "step": 506300 }, { "epoch": 0.32, "learning_rate": 3.3816842779378806e-05, "loss": 1.6791, "step": 506400 }, { "epoch": 0.32, "learning_rate": 3.381364073006724e-05, "loss": 1.6884, "step": 506500 }, { "epoch": 0.32, "learning_rate": 3.3810438680755685e-05, "loss": 1.6968, "step": 506600 }, { "epoch": 0.32, "learning_rate": 3.3807236631444125e-05, "loss": 1.684, "step": 506700 }, { "epoch": 0.32, "learning_rate": 3.3804034582132565e-05, "loss": 1.6874, "step": 506800 }, { "epoch": 0.32, "learning_rate": 3.380083253282101e-05, "loss": 1.6725, "step": 506900 }, { "epoch": 0.32, "learning_rate": 3.3797630483509444e-05, "loss": 1.6897, "step": 507000 }, { "epoch": 0.32, "eval_loss": 1.7375620603561401, "eval_runtime": 93.8957, "eval_samples_per_second": 106.501, "eval_steps_per_second": 6.656, "step": 507000 }, { "epoch": 0.32, "learning_rate": 3.379442843419789e-05, "loss": 1.6847, "step": 507100 }, { "epoch": 0.32, "learning_rate": 3.3791226384886324e-05, "loss": 1.7109, "step": 507200 }, { "epoch": 0.32, "learning_rate": 3.378802433557477e-05, "loss": 1.7041, "step": 507300 }, { "epoch": 0.32, "learning_rate": 3.378482228626321e-05, "loss": 1.6797, "step": 507400 }, { "epoch": 0.32, "learning_rate": 3.378162023695165e-05, "loss": 1.6931, "step": 507500 }, { "epoch": 0.32, "learning_rate": 3.3778418187640096e-05, "loss": 1.6967, "step": 507600 }, { "epoch": 0.32, "learning_rate": 3.377521613832853e-05, "loss": 1.6848, "step": 507700 }, { "epoch": 0.32, "learning_rate": 3.3772014089016976e-05, "loss": 1.6851, "step": 507800 }, { "epoch": 0.33, "learning_rate": 3.376881203970541e-05, "loss": 1.6785, "step": 507900 }, { "epoch": 0.33, "learning_rate": 3.3765609990393855e-05, "loss": 1.7102, "step": 508000 }, { "epoch": 0.33, "eval_loss": 1.740316390991211, "eval_runtime": 94.8252, "eval_samples_per_second": 105.457, "eval_steps_per_second": 6.591, "step": 508000 }, { "epoch": 0.33, "learning_rate": 3.3762407941082295e-05, "loss": 1.6912, "step": 508100 }, { "epoch": 0.33, "learning_rate": 3.3759205891770734e-05, "loss": 1.6977, "step": 508200 }, { "epoch": 0.33, "learning_rate": 3.3756003842459174e-05, "loss": 1.6936, "step": 508300 }, { "epoch": 0.33, "learning_rate": 3.3752801793147614e-05, "loss": 1.6919, "step": 508400 }, { "epoch": 0.33, "learning_rate": 3.374959974383606e-05, "loss": 1.6782, "step": 508500 }, { "epoch": 0.33, "learning_rate": 3.3746397694524493e-05, "loss": 1.6902, "step": 508600 }, { "epoch": 0.33, "learning_rate": 3.374319564521294e-05, "loss": 1.699, "step": 508700 }, { "epoch": 0.33, "learning_rate": 3.373999359590137e-05, "loss": 1.6877, "step": 508800 }, { "epoch": 0.33, "learning_rate": 3.373679154658982e-05, "loss": 1.6705, "step": 508900 }, { "epoch": 0.33, "learning_rate": 3.373358949727826e-05, "loss": 1.6836, "step": 509000 }, { "epoch": 0.33, "eval_loss": 1.7428820133209229, "eval_runtime": 100.4643, "eval_samples_per_second": 99.538, "eval_steps_per_second": 6.221, "step": 509000 }, { "epoch": 0.33, "learning_rate": 3.37303874479667e-05, "loss": 1.6938, "step": 509100 }, { "epoch": 0.33, "learning_rate": 3.3727185398655145e-05, "loss": 1.6907, "step": 509200 }, { "epoch": 0.33, "learning_rate": 3.372398334934358e-05, "loss": 1.7024, "step": 509300 }, { "epoch": 0.33, "learning_rate": 3.3720781300032025e-05, "loss": 1.6801, "step": 509400 }, { "epoch": 0.33, "learning_rate": 3.371757925072046e-05, "loss": 1.6994, "step": 509500 }, { "epoch": 0.33, "learning_rate": 3.3714377201408904e-05, "loss": 1.6878, "step": 509600 }, { "epoch": 0.33, "learning_rate": 3.3711175152097344e-05, "loss": 1.6879, "step": 509700 }, { "epoch": 0.33, "learning_rate": 3.3707973102785784e-05, "loss": 1.6629, "step": 509800 }, { "epoch": 0.33, "learning_rate": 3.3704771053474224e-05, "loss": 1.6878, "step": 509900 }, { "epoch": 0.33, "learning_rate": 3.370156900416266e-05, "loss": 1.6726, "step": 510000 }, { "epoch": 0.33, "eval_loss": 1.742061972618103, "eval_runtime": 94.0889, "eval_samples_per_second": 106.282, "eval_steps_per_second": 6.643, "step": 510000 }, { "epoch": 0.33, "learning_rate": 3.369836695485111e-05, "loss": 1.6773, "step": 510100 }, { "epoch": 0.33, "learning_rate": 3.369516490553954e-05, "loss": 1.6986, "step": 510200 }, { "epoch": 0.33, "learning_rate": 3.369196285622799e-05, "loss": 1.6868, "step": 510300 }, { "epoch": 0.33, "learning_rate": 3.368876080691643e-05, "loss": 1.6731, "step": 510400 }, { "epoch": 0.33, "learning_rate": 3.368555875760487e-05, "loss": 1.6735, "step": 510500 }, { "epoch": 0.33, "learning_rate": 3.368235670829331e-05, "loss": 1.7034, "step": 510600 }, { "epoch": 0.33, "learning_rate": 3.367915465898175e-05, "loss": 1.6676, "step": 510700 }, { "epoch": 0.33, "learning_rate": 3.3675952609670195e-05, "loss": 1.673, "step": 510800 }, { "epoch": 0.33, "learning_rate": 3.367275056035863e-05, "loss": 1.7045, "step": 510900 }, { "epoch": 0.33, "learning_rate": 3.3669548511047074e-05, "loss": 1.6915, "step": 511000 }, { "epoch": 0.33, "eval_loss": 1.7422599792480469, "eval_runtime": 92.4114, "eval_samples_per_second": 108.212, "eval_steps_per_second": 6.763, "step": 511000 }, { "epoch": 0.33, "learning_rate": 3.3666346461735514e-05, "loss": 1.7056, "step": 511100 }, { "epoch": 0.33, "learning_rate": 3.3663144412423954e-05, "loss": 1.68, "step": 511200 }, { "epoch": 0.33, "learning_rate": 3.3659942363112393e-05, "loss": 1.6715, "step": 511300 }, { "epoch": 0.33, "learning_rate": 3.365674031380083e-05, "loss": 1.7024, "step": 511400 }, { "epoch": 0.33, "learning_rate": 3.365353826448927e-05, "loss": 1.6801, "step": 511500 }, { "epoch": 0.33, "learning_rate": 3.365033621517771e-05, "loss": 1.6941, "step": 511600 }, { "epoch": 0.33, "learning_rate": 3.364713416586616e-05, "loss": 1.6943, "step": 511700 }, { "epoch": 0.33, "learning_rate": 3.364393211655459e-05, "loss": 1.6939, "step": 511800 }, { "epoch": 0.33, "learning_rate": 3.364073006724304e-05, "loss": 1.6978, "step": 511900 }, { "epoch": 0.33, "learning_rate": 3.363752801793148e-05, "loss": 1.7121, "step": 512000 }, { "epoch": 0.33, "eval_loss": 1.739151954650879, "eval_runtime": 105.3863, "eval_samples_per_second": 94.889, "eval_steps_per_second": 5.931, "step": 512000 }, { "epoch": 0.33, "learning_rate": 3.363432596861992e-05, "loss": 1.6983, "step": 512100 }, { "epoch": 0.33, "learning_rate": 3.363112391930836e-05, "loss": 1.6915, "step": 512200 }, { "epoch": 0.33, "learning_rate": 3.36279218699968e-05, "loss": 1.7003, "step": 512300 }, { "epoch": 0.33, "learning_rate": 3.3624719820685244e-05, "loss": 1.6778, "step": 512400 }, { "epoch": 0.33, "learning_rate": 3.362151777137368e-05, "loss": 1.6892, "step": 512500 }, { "epoch": 0.33, "learning_rate": 3.3618315722062124e-05, "loss": 1.7016, "step": 512600 }, { "epoch": 0.33, "learning_rate": 3.361511367275056e-05, "loss": 1.6844, "step": 512700 }, { "epoch": 0.33, "learning_rate": 3.3611911623439e-05, "loss": 1.6727, "step": 512800 }, { "epoch": 0.33, "learning_rate": 3.360870957412744e-05, "loss": 1.6855, "step": 512900 }, { "epoch": 0.33, "learning_rate": 3.360550752481588e-05, "loss": 1.7082, "step": 513000 }, { "epoch": 0.33, "eval_loss": 1.7416062355041504, "eval_runtime": 104.2741, "eval_samples_per_second": 95.901, "eval_steps_per_second": 5.994, "step": 513000 }, { "epoch": 0.33, "learning_rate": 3.360230547550432e-05, "loss": 1.6833, "step": 513100 }, { "epoch": 0.33, "learning_rate": 3.359910342619276e-05, "loss": 1.6744, "step": 513200 }, { "epoch": 0.33, "learning_rate": 3.359590137688121e-05, "loss": 1.7005, "step": 513300 }, { "epoch": 0.33, "learning_rate": 3.359269932756965e-05, "loss": 1.6766, "step": 513400 }, { "epoch": 0.33, "learning_rate": 3.358949727825809e-05, "loss": 1.7006, "step": 513500 }, { "epoch": 0.33, "learning_rate": 3.358629522894653e-05, "loss": 1.692, "step": 513600 }, { "epoch": 0.33, "learning_rate": 3.358309317963497e-05, "loss": 1.6822, "step": 513700 }, { "epoch": 0.33, "learning_rate": 3.357989113032341e-05, "loss": 1.6811, "step": 513800 }, { "epoch": 0.33, "learning_rate": 3.357668908101185e-05, "loss": 1.6664, "step": 513900 }, { "epoch": 0.33, "learning_rate": 3.3573487031700294e-05, "loss": 1.675, "step": 514000 }, { "epoch": 0.33, "eval_loss": 1.7437293529510498, "eval_runtime": 103.0672, "eval_samples_per_second": 97.024, "eval_steps_per_second": 6.064, "step": 514000 }, { "epoch": 0.33, "learning_rate": 3.3570284982388727e-05, "loss": 1.675, "step": 514100 }, { "epoch": 0.33, "learning_rate": 3.356708293307717e-05, "loss": 1.6965, "step": 514200 }, { "epoch": 0.33, "learning_rate": 3.356388088376561e-05, "loss": 1.6856, "step": 514300 }, { "epoch": 0.33, "learning_rate": 3.356067883445405e-05, "loss": 1.6938, "step": 514400 }, { "epoch": 0.33, "learning_rate": 3.355747678514249e-05, "loss": 1.6839, "step": 514500 }, { "epoch": 0.33, "learning_rate": 3.355427473583093e-05, "loss": 1.6944, "step": 514600 }, { "epoch": 0.33, "learning_rate": 3.355107268651937e-05, "loss": 1.6872, "step": 514700 }, { "epoch": 0.33, "learning_rate": 3.354787063720781e-05, "loss": 1.6982, "step": 514800 }, { "epoch": 0.33, "learning_rate": 3.354466858789626e-05, "loss": 1.6867, "step": 514900 }, { "epoch": 0.33, "learning_rate": 3.35414665385847e-05, "loss": 1.6981, "step": 515000 }, { "epoch": 0.33, "eval_loss": 1.7422230243682861, "eval_runtime": 103.7904, "eval_samples_per_second": 96.348, "eval_steps_per_second": 6.022, "step": 515000 }, { "epoch": 0.33, "learning_rate": 3.353826448927314e-05, "loss": 1.6898, "step": 515100 }, { "epoch": 0.33, "learning_rate": 3.353506243996158e-05, "loss": 1.679, "step": 515200 }, { "epoch": 0.33, "learning_rate": 3.353186039065002e-05, "loss": 1.6928, "step": 515300 }, { "epoch": 0.33, "learning_rate": 3.352865834133846e-05, "loss": 1.6802, "step": 515400 }, { "epoch": 0.33, "learning_rate": 3.3525456292026896e-05, "loss": 1.6896, "step": 515500 }, { "epoch": 0.33, "learning_rate": 3.352225424271534e-05, "loss": 1.667, "step": 515600 }, { "epoch": 0.33, "learning_rate": 3.351905219340378e-05, "loss": 1.6847, "step": 515700 }, { "epoch": 0.33, "learning_rate": 3.351585014409222e-05, "loss": 1.6747, "step": 515800 }, { "epoch": 0.33, "learning_rate": 3.351264809478066e-05, "loss": 1.6802, "step": 515900 }, { "epoch": 0.33, "learning_rate": 3.35094460454691e-05, "loss": 1.6886, "step": 516000 }, { "epoch": 0.33, "eval_loss": 1.7405519485473633, "eval_runtime": 105.3409, "eval_samples_per_second": 94.93, "eval_steps_per_second": 5.933, "step": 516000 }, { "epoch": 0.33, "learning_rate": 3.350624399615754e-05, "loss": 1.6847, "step": 516100 }, { "epoch": 0.33, "learning_rate": 3.350304194684598e-05, "loss": 1.6918, "step": 516200 }, { "epoch": 0.33, "learning_rate": 3.349983989753442e-05, "loss": 1.6933, "step": 516300 }, { "epoch": 0.33, "learning_rate": 3.349663784822286e-05, "loss": 1.6994, "step": 516400 }, { "epoch": 0.33, "learning_rate": 3.349343579891131e-05, "loss": 1.7075, "step": 516500 }, { "epoch": 0.33, "learning_rate": 3.349023374959975e-05, "loss": 1.6852, "step": 516600 }, { "epoch": 0.33, "learning_rate": 3.348703170028819e-05, "loss": 1.6738, "step": 516700 }, { "epoch": 0.33, "learning_rate": 3.3483829650976627e-05, "loss": 1.6713, "step": 516800 }, { "epoch": 0.33, "learning_rate": 3.3480627601665066e-05, "loss": 1.6844, "step": 516900 }, { "epoch": 0.33, "learning_rate": 3.3477425552353506e-05, "loss": 1.6903, "step": 517000 }, { "epoch": 0.33, "eval_loss": 1.7422064542770386, "eval_runtime": 92.6364, "eval_samples_per_second": 107.949, "eval_steps_per_second": 6.747, "step": 517000 }, { "epoch": 0.33, "learning_rate": 3.3474223503041946e-05, "loss": 1.6939, "step": 517100 }, { "epoch": 0.33, "learning_rate": 3.347102145373039e-05, "loss": 1.692, "step": 517200 }, { "epoch": 0.33, "learning_rate": 3.346781940441883e-05, "loss": 1.6877, "step": 517300 }, { "epoch": 0.33, "learning_rate": 3.346461735510727e-05, "loss": 1.6789, "step": 517400 }, { "epoch": 0.33, "learning_rate": 3.346141530579571e-05, "loss": 1.6753, "step": 517500 }, { "epoch": 0.33, "learning_rate": 3.345821325648415e-05, "loss": 1.6777, "step": 517600 }, { "epoch": 0.33, "learning_rate": 3.345501120717259e-05, "loss": 1.6823, "step": 517700 }, { "epoch": 0.33, "learning_rate": 3.345180915786103e-05, "loss": 1.6892, "step": 517800 }, { "epoch": 0.33, "learning_rate": 3.344860710854947e-05, "loss": 1.6878, "step": 517900 }, { "epoch": 0.33, "learning_rate": 3.344540505923792e-05, "loss": 1.6975, "step": 518000 }, { "epoch": 0.33, "eval_loss": 1.7400257587432861, "eval_runtime": 115.53, "eval_samples_per_second": 86.558, "eval_steps_per_second": 5.41, "step": 518000 }, { "epoch": 0.33, "learning_rate": 3.344220300992636e-05, "loss": 1.6895, "step": 518100 }, { "epoch": 0.33, "learning_rate": 3.3439000960614796e-05, "loss": 1.6965, "step": 518200 }, { "epoch": 0.33, "learning_rate": 3.3435798911303236e-05, "loss": 1.707, "step": 518300 }, { "epoch": 0.33, "learning_rate": 3.3432596861991676e-05, "loss": 1.6848, "step": 518400 }, { "epoch": 0.33, "learning_rate": 3.3429394812680116e-05, "loss": 1.6889, "step": 518500 }, { "epoch": 0.33, "learning_rate": 3.3426192763368555e-05, "loss": 1.7123, "step": 518600 }, { "epoch": 0.33, "learning_rate": 3.3422990714056995e-05, "loss": 1.6829, "step": 518700 }, { "epoch": 0.33, "learning_rate": 3.341978866474544e-05, "loss": 1.6895, "step": 518800 }, { "epoch": 0.33, "learning_rate": 3.341658661543388e-05, "loss": 1.6895, "step": 518900 }, { "epoch": 0.33, "learning_rate": 3.341338456612232e-05, "loss": 1.6882, "step": 519000 }, { "epoch": 0.33, "eval_loss": 1.7391926050186157, "eval_runtime": 100.8141, "eval_samples_per_second": 99.193, "eval_steps_per_second": 6.2, "step": 519000 }, { "epoch": 0.33, "learning_rate": 3.341018251681076e-05, "loss": 1.6592, "step": 519100 }, { "epoch": 0.33, "learning_rate": 3.34069804674992e-05, "loss": 1.6786, "step": 519200 }, { "epoch": 0.33, "learning_rate": 3.340377841818764e-05, "loss": 1.6716, "step": 519300 }, { "epoch": 0.33, "learning_rate": 3.340057636887608e-05, "loss": 1.6688, "step": 519400 }, { "epoch": 0.33, "learning_rate": 3.339737431956452e-05, "loss": 1.6764, "step": 519500 }, { "epoch": 0.33, "learning_rate": 3.3394172270252966e-05, "loss": 1.6825, "step": 519600 }, { "epoch": 0.33, "learning_rate": 3.3390970220941406e-05, "loss": 1.69, "step": 519700 }, { "epoch": 0.33, "learning_rate": 3.3387768171629846e-05, "loss": 1.6926, "step": 519800 }, { "epoch": 0.33, "learning_rate": 3.3384566122318286e-05, "loss": 1.6788, "step": 519900 }, { "epoch": 0.33, "learning_rate": 3.3381364073006725e-05, "loss": 1.6914, "step": 520000 }, { "epoch": 0.33, "eval_loss": 1.7390999794006348, "eval_runtime": 98.5714, "eval_samples_per_second": 101.449, "eval_steps_per_second": 6.341, "step": 520000 }, { "epoch": 0.33, "learning_rate": 3.3378162023695165e-05, "loss": 1.6846, "step": 520100 }, { "epoch": 0.33, "learning_rate": 3.3374959974383605e-05, "loss": 1.6679, "step": 520200 }, { "epoch": 0.33, "learning_rate": 3.337175792507205e-05, "loss": 1.698, "step": 520300 }, { "epoch": 0.33, "learning_rate": 3.336855587576049e-05, "loss": 1.6839, "step": 520400 }, { "epoch": 0.33, "learning_rate": 3.336535382644893e-05, "loss": 1.6807, "step": 520500 }, { "epoch": 0.33, "learning_rate": 3.336215177713737e-05, "loss": 1.6906, "step": 520600 }, { "epoch": 0.33, "learning_rate": 3.335894972782581e-05, "loss": 1.6881, "step": 520700 }, { "epoch": 0.33, "learning_rate": 3.335574767851425e-05, "loss": 1.6822, "step": 520800 }, { "epoch": 0.33, "learning_rate": 3.335254562920269e-05, "loss": 1.688, "step": 520900 }, { "epoch": 0.33, "learning_rate": 3.3349343579891136e-05, "loss": 1.6757, "step": 521000 }, { "epoch": 0.33, "eval_loss": 1.7390830516815186, "eval_runtime": 105.0195, "eval_samples_per_second": 95.22, "eval_steps_per_second": 5.951, "step": 521000 }, { "epoch": 0.33, "learning_rate": 3.334614153057957e-05, "loss": 1.6935, "step": 521100 }, { "epoch": 0.33, "learning_rate": 3.3342939481268016e-05, "loss": 1.687, "step": 521200 }, { "epoch": 0.33, "learning_rate": 3.3339737431956455e-05, "loss": 1.6869, "step": 521300 }, { "epoch": 0.33, "learning_rate": 3.3336535382644895e-05, "loss": 1.6851, "step": 521400 }, { "epoch": 0.33, "learning_rate": 3.3333333333333335e-05, "loss": 1.6949, "step": 521500 }, { "epoch": 0.33, "learning_rate": 3.3330131284021775e-05, "loss": 1.6807, "step": 521600 }, { "epoch": 0.33, "learning_rate": 3.3326929234710214e-05, "loss": 1.6768, "step": 521700 }, { "epoch": 0.33, "learning_rate": 3.3323727185398654e-05, "loss": 1.6732, "step": 521800 }, { "epoch": 0.33, "learning_rate": 3.33205251360871e-05, "loss": 1.7069, "step": 521900 }, { "epoch": 0.33, "learning_rate": 3.331732308677554e-05, "loss": 1.6961, "step": 522000 }, { "epoch": 0.33, "eval_loss": 1.7395892143249512, "eval_runtime": 104.5874, "eval_samples_per_second": 95.614, "eval_steps_per_second": 5.976, "step": 522000 }, { "epoch": 0.33, "learning_rate": 3.331412103746398e-05, "loss": 1.6697, "step": 522100 }, { "epoch": 0.33, "learning_rate": 3.331091898815242e-05, "loss": 1.6659, "step": 522200 }, { "epoch": 0.33, "learning_rate": 3.330771693884086e-05, "loss": 1.6654, "step": 522300 }, { "epoch": 0.33, "learning_rate": 3.33045148895293e-05, "loss": 1.6686, "step": 522400 }, { "epoch": 0.33, "learning_rate": 3.330131284021774e-05, "loss": 1.676, "step": 522500 }, { "epoch": 0.33, "learning_rate": 3.3298110790906186e-05, "loss": 1.6655, "step": 522600 }, { "epoch": 0.33, "learning_rate": 3.329490874159462e-05, "loss": 1.6798, "step": 522700 }, { "epoch": 0.33, "learning_rate": 3.3291706692283065e-05, "loss": 1.708, "step": 522800 }, { "epoch": 0.33, "learning_rate": 3.3288504642971505e-05, "loss": 1.6879, "step": 522900 }, { "epoch": 0.33, "learning_rate": 3.3285302593659945e-05, "loss": 1.69, "step": 523000 }, { "epoch": 0.33, "eval_loss": 1.7431408166885376, "eval_runtime": 96.0286, "eval_samples_per_second": 104.136, "eval_steps_per_second": 6.508, "step": 523000 }, { "epoch": 0.33, "learning_rate": 3.3282100544348384e-05, "loss": 1.677, "step": 523100 }, { "epoch": 0.33, "learning_rate": 3.3278898495036824e-05, "loss": 1.677, "step": 523200 }, { "epoch": 0.33, "learning_rate": 3.327569644572527e-05, "loss": 1.6832, "step": 523300 }, { "epoch": 0.33, "learning_rate": 3.3272494396413704e-05, "loss": 1.6871, "step": 523400 }, { "epoch": 0.34, "learning_rate": 3.326929234710215e-05, "loss": 1.6778, "step": 523500 }, { "epoch": 0.34, "learning_rate": 3.326609029779059e-05, "loss": 1.6825, "step": 523600 }, { "epoch": 0.34, "learning_rate": 3.326288824847903e-05, "loss": 1.6729, "step": 523700 }, { "epoch": 0.34, "learning_rate": 3.325968619916747e-05, "loss": 1.6605, "step": 523800 }, { "epoch": 0.34, "learning_rate": 3.325648414985591e-05, "loss": 1.6829, "step": 523900 }, { "epoch": 0.34, "learning_rate": 3.325328210054435e-05, "loss": 1.6791, "step": 524000 }, { "epoch": 0.34, "eval_loss": 1.7410475015640259, "eval_runtime": 102.3765, "eval_samples_per_second": 97.679, "eval_steps_per_second": 6.105, "step": 524000 }, { "epoch": 0.34, "learning_rate": 3.325008005123279e-05, "loss": 1.6919, "step": 524100 }, { "epoch": 0.34, "learning_rate": 3.3246878001921235e-05, "loss": 1.672, "step": 524200 }, { "epoch": 0.34, "learning_rate": 3.324367595260967e-05, "loss": 1.6888, "step": 524300 }, { "epoch": 0.34, "learning_rate": 3.3240473903298114e-05, "loss": 1.6794, "step": 524400 }, { "epoch": 0.34, "learning_rate": 3.3237271853986554e-05, "loss": 1.691, "step": 524500 }, { "epoch": 0.34, "learning_rate": 3.3234069804674994e-05, "loss": 1.6973, "step": 524600 }, { "epoch": 0.34, "learning_rate": 3.3230867755363434e-05, "loss": 1.6906, "step": 524700 }, { "epoch": 0.34, "learning_rate": 3.3227665706051873e-05, "loss": 1.6722, "step": 524800 }, { "epoch": 0.34, "learning_rate": 3.322446365674032e-05, "loss": 1.6782, "step": 524900 }, { "epoch": 0.34, "learning_rate": 3.322126160742875e-05, "loss": 1.6835, "step": 525000 }, { "epoch": 0.34, "eval_loss": 1.7413116693496704, "eval_runtime": 100.5198, "eval_samples_per_second": 99.483, "eval_steps_per_second": 6.218, "step": 525000 }, { "epoch": 0.34, "learning_rate": 3.32180595581172e-05, "loss": 1.6724, "step": 525100 }, { "epoch": 0.34, "learning_rate": 3.321485750880563e-05, "loss": 1.6616, "step": 525200 }, { "epoch": 0.34, "learning_rate": 3.321165545949408e-05, "loss": 1.6725, "step": 525300 }, { "epoch": 0.34, "learning_rate": 3.320845341018252e-05, "loss": 1.6805, "step": 525400 }, { "epoch": 0.34, "learning_rate": 3.320525136087096e-05, "loss": 1.688, "step": 525500 }, { "epoch": 0.34, "learning_rate": 3.3202049311559405e-05, "loss": 1.6849, "step": 525600 }, { "epoch": 0.34, "learning_rate": 3.319884726224784e-05, "loss": 1.6898, "step": 525700 }, { "epoch": 0.34, "learning_rate": 3.3195645212936284e-05, "loss": 1.6789, "step": 525800 }, { "epoch": 0.34, "learning_rate": 3.319244316362472e-05, "loss": 1.6771, "step": 525900 }, { "epoch": 0.34, "learning_rate": 3.3189241114313164e-05, "loss": 1.7, "step": 526000 }, { "epoch": 0.34, "eval_loss": 1.7398755550384521, "eval_runtime": 96.492, "eval_samples_per_second": 103.636, "eval_steps_per_second": 6.477, "step": 526000 }, { "epoch": 0.34, "learning_rate": 3.3186039065001604e-05, "loss": 1.6717, "step": 526100 }, { "epoch": 0.34, "learning_rate": 3.318283701569004e-05, "loss": 1.6805, "step": 526200 }, { "epoch": 0.34, "learning_rate": 3.317963496637848e-05, "loss": 1.6768, "step": 526300 }, { "epoch": 0.34, "learning_rate": 3.317643291706692e-05, "loss": 1.6787, "step": 526400 }, { "epoch": 0.34, "learning_rate": 3.317323086775537e-05, "loss": 1.6726, "step": 526500 }, { "epoch": 0.34, "learning_rate": 3.31700288184438e-05, "loss": 1.6571, "step": 526600 }, { "epoch": 0.34, "learning_rate": 3.316682676913225e-05, "loss": 1.6734, "step": 526700 }, { "epoch": 0.34, "learning_rate": 3.316362471982068e-05, "loss": 1.704, "step": 526800 }, { "epoch": 0.34, "learning_rate": 3.316042267050913e-05, "loss": 1.6965, "step": 526900 }, { "epoch": 0.34, "learning_rate": 3.315722062119757e-05, "loss": 1.6724, "step": 527000 }, { "epoch": 0.34, "eval_loss": 1.7394349575042725, "eval_runtime": 102.361, "eval_samples_per_second": 97.693, "eval_steps_per_second": 6.106, "step": 527000 }, { "epoch": 0.34, "learning_rate": 3.315401857188601e-05, "loss": 1.6838, "step": 527100 }, { "epoch": 0.34, "learning_rate": 3.3150816522574454e-05, "loss": 1.6795, "step": 527200 }, { "epoch": 0.34, "learning_rate": 3.314761447326289e-05, "loss": 1.6815, "step": 527300 }, { "epoch": 0.34, "learning_rate": 3.3144412423951334e-05, "loss": 1.6794, "step": 527400 }, { "epoch": 0.34, "learning_rate": 3.314121037463977e-05, "loss": 1.6629, "step": 527500 }, { "epoch": 0.34, "learning_rate": 3.313800832532821e-05, "loss": 1.6854, "step": 527600 }, { "epoch": 0.34, "learning_rate": 3.313480627601665e-05, "loss": 1.6667, "step": 527700 }, { "epoch": 0.34, "learning_rate": 3.313160422670509e-05, "loss": 1.6725, "step": 527800 }, { "epoch": 0.34, "learning_rate": 3.312840217739354e-05, "loss": 1.6839, "step": 527900 }, { "epoch": 0.34, "learning_rate": 3.312520012808197e-05, "loss": 1.6782, "step": 528000 }, { "epoch": 0.34, "eval_loss": 1.7380974292755127, "eval_runtime": 93.0969, "eval_samples_per_second": 107.415, "eval_steps_per_second": 6.713, "step": 528000 }, { "epoch": 0.34, "learning_rate": 3.312199807877042e-05, "loss": 1.692, "step": 528100 }, { "epoch": 0.34, "learning_rate": 3.311879602945885e-05, "loss": 1.6774, "step": 528200 }, { "epoch": 0.34, "learning_rate": 3.31155939801473e-05, "loss": 1.6833, "step": 528300 }, { "epoch": 0.34, "learning_rate": 3.311239193083573e-05, "loss": 1.6658, "step": 528400 }, { "epoch": 0.34, "learning_rate": 3.310918988152418e-05, "loss": 1.6763, "step": 528500 }, { "epoch": 0.34, "learning_rate": 3.310598783221262e-05, "loss": 1.6831, "step": 528600 }, { "epoch": 0.34, "learning_rate": 3.310278578290106e-05, "loss": 1.6817, "step": 528700 }, { "epoch": 0.34, "learning_rate": 3.3099583733589504e-05, "loss": 1.6716, "step": 528800 }, { "epoch": 0.34, "learning_rate": 3.3096381684277937e-05, "loss": 1.6869, "step": 528900 }, { "epoch": 0.34, "learning_rate": 3.309317963496638e-05, "loss": 1.6847, "step": 529000 }, { "epoch": 0.34, "eval_loss": 1.738581657409668, "eval_runtime": 90.8132, "eval_samples_per_second": 110.116, "eval_steps_per_second": 6.882, "step": 529000 }, { "epoch": 0.34, "learning_rate": 3.3089977585654816e-05, "loss": 1.6887, "step": 529100 }, { "epoch": 0.34, "learning_rate": 3.308677553634326e-05, "loss": 1.6847, "step": 529200 }, { "epoch": 0.34, "learning_rate": 3.30835734870317e-05, "loss": 1.6843, "step": 529300 }, { "epoch": 0.34, "learning_rate": 3.308037143772014e-05, "loss": 1.697, "step": 529400 }, { "epoch": 0.34, "learning_rate": 3.307716938840859e-05, "loss": 1.667, "step": 529500 }, { "epoch": 0.34, "learning_rate": 3.307396733909702e-05, "loss": 1.6643, "step": 529600 }, { "epoch": 0.34, "learning_rate": 3.307076528978547e-05, "loss": 1.6971, "step": 529700 }, { "epoch": 0.34, "learning_rate": 3.30675632404739e-05, "loss": 1.7036, "step": 529800 }, { "epoch": 0.34, "learning_rate": 3.306436119116235e-05, "loss": 1.6715, "step": 529900 }, { "epoch": 0.34, "learning_rate": 3.306115914185078e-05, "loss": 1.6876, "step": 530000 }, { "epoch": 0.34, "eval_loss": 1.7392300367355347, "eval_runtime": 92.2413, "eval_samples_per_second": 108.411, "eval_steps_per_second": 6.776, "step": 530000 }, { "epoch": 0.34, "learning_rate": 3.305795709253923e-05, "loss": 1.6598, "step": 530100 }, { "epoch": 0.34, "learning_rate": 3.305475504322767e-05, "loss": 1.7026, "step": 530200 }, { "epoch": 0.34, "learning_rate": 3.3051552993916106e-05, "loss": 1.7041, "step": 530300 }, { "epoch": 0.34, "learning_rate": 3.304835094460455e-05, "loss": 1.671, "step": 530400 }, { "epoch": 0.34, "learning_rate": 3.3045148895292986e-05, "loss": 1.6995, "step": 530500 }, { "epoch": 0.34, "learning_rate": 3.304194684598143e-05, "loss": 1.6586, "step": 530600 }, { "epoch": 0.34, "learning_rate": 3.3038744796669865e-05, "loss": 1.6561, "step": 530700 }, { "epoch": 0.34, "learning_rate": 3.303554274735831e-05, "loss": 1.69, "step": 530800 }, { "epoch": 0.34, "learning_rate": 3.303234069804675e-05, "loss": 1.6463, "step": 530900 }, { "epoch": 0.34, "learning_rate": 3.302913864873519e-05, "loss": 1.7023, "step": 531000 }, { "epoch": 0.34, "eval_loss": 1.7411870956420898, "eval_runtime": 93.0804, "eval_samples_per_second": 107.434, "eval_steps_per_second": 6.715, "step": 531000 }, { "epoch": 0.34, "learning_rate": 3.302593659942364e-05, "loss": 1.6792, "step": 531100 }, { "epoch": 0.34, "learning_rate": 3.302273455011207e-05, "loss": 1.6805, "step": 531200 }, { "epoch": 0.34, "learning_rate": 3.301953250080052e-05, "loss": 1.6857, "step": 531300 }, { "epoch": 0.34, "learning_rate": 3.301633045148895e-05, "loss": 1.6825, "step": 531400 }, { "epoch": 0.34, "learning_rate": 3.30131284021774e-05, "loss": 1.7184, "step": 531500 }, { "epoch": 0.34, "learning_rate": 3.300992635286583e-05, "loss": 1.6704, "step": 531600 }, { "epoch": 0.34, "learning_rate": 3.3006724303554276e-05, "loss": 1.6696, "step": 531700 }, { "epoch": 0.34, "learning_rate": 3.3003522254242716e-05, "loss": 1.6787, "step": 531800 }, { "epoch": 0.34, "learning_rate": 3.3000320204931156e-05, "loss": 1.6777, "step": 531900 }, { "epoch": 0.34, "learning_rate": 3.29971181556196e-05, "loss": 1.684, "step": 532000 }, { "epoch": 0.34, "eval_loss": 1.7395613193511963, "eval_runtime": 90.4, "eval_samples_per_second": 110.619, "eval_steps_per_second": 6.914, "step": 532000 }, { "epoch": 0.34, "learning_rate": 3.2993916106308035e-05, "loss": 1.6717, "step": 532100 }, { "epoch": 0.34, "learning_rate": 3.299071405699648e-05, "loss": 1.6765, "step": 532200 }, { "epoch": 0.34, "learning_rate": 3.2987512007684915e-05, "loss": 1.6756, "step": 532300 }, { "epoch": 0.34, "learning_rate": 3.298430995837336e-05, "loss": 1.6908, "step": 532400 }, { "epoch": 0.34, "learning_rate": 3.29811079090618e-05, "loss": 1.6848, "step": 532500 }, { "epoch": 0.34, "learning_rate": 3.297790585975024e-05, "loss": 1.6933, "step": 532600 }, { "epoch": 0.34, "learning_rate": 3.297470381043869e-05, "loss": 1.6882, "step": 532700 }, { "epoch": 0.34, "learning_rate": 3.297150176112712e-05, "loss": 1.708, "step": 532800 }, { "epoch": 0.34, "learning_rate": 3.296829971181557e-05, "loss": 1.6958, "step": 532900 }, { "epoch": 0.34, "learning_rate": 3.2965097662504e-05, "loss": 1.69, "step": 533000 }, { "epoch": 0.34, "eval_loss": 1.7414807081222534, "eval_runtime": 90.2148, "eval_samples_per_second": 110.847, "eval_steps_per_second": 6.928, "step": 533000 }, { "epoch": 0.34, "learning_rate": 3.2961895613192446e-05, "loss": 1.6755, "step": 533100 }, { "epoch": 0.34, "learning_rate": 3.2958693563880886e-05, "loss": 1.6705, "step": 533200 }, { "epoch": 0.34, "learning_rate": 3.2955491514569326e-05, "loss": 1.6949, "step": 533300 }, { "epoch": 0.34, "learning_rate": 3.2952289465257765e-05, "loss": 1.6942, "step": 533400 }, { "epoch": 0.34, "learning_rate": 3.2949087415946205e-05, "loss": 1.6726, "step": 533500 }, { "epoch": 0.34, "learning_rate": 3.294588536663465e-05, "loss": 1.6818, "step": 533600 }, { "epoch": 0.34, "learning_rate": 3.2942683317323085e-05, "loss": 1.661, "step": 533700 }, { "epoch": 0.34, "learning_rate": 3.293948126801153e-05, "loss": 1.6859, "step": 533800 }, { "epoch": 0.34, "learning_rate": 3.2936279218699964e-05, "loss": 1.6956, "step": 533900 }, { "epoch": 0.34, "learning_rate": 3.293307716938841e-05, "loss": 1.6684, "step": 534000 }, { "epoch": 0.34, "eval_loss": 1.739929437637329, "eval_runtime": 92.3828, "eval_samples_per_second": 108.245, "eval_steps_per_second": 6.765, "step": 534000 }, { "epoch": 0.34, "learning_rate": 3.292987512007685e-05, "loss": 1.671, "step": 534100 }, { "epoch": 0.34, "learning_rate": 3.292667307076529e-05, "loss": 1.6877, "step": 534200 }, { "epoch": 0.34, "learning_rate": 3.292347102145374e-05, "loss": 1.688, "step": 534300 }, { "epoch": 0.34, "learning_rate": 3.292026897214217e-05, "loss": 1.6712, "step": 534400 }, { "epoch": 0.34, "learning_rate": 3.2917066922830616e-05, "loss": 1.6872, "step": 534500 }, { "epoch": 0.34, "learning_rate": 3.291386487351905e-05, "loss": 1.6731, "step": 534600 }, { "epoch": 0.34, "learning_rate": 3.2910662824207496e-05, "loss": 1.6728, "step": 534700 }, { "epoch": 0.34, "learning_rate": 3.2907460774895935e-05, "loss": 1.6615, "step": 534800 }, { "epoch": 0.34, "learning_rate": 3.2904258725584375e-05, "loss": 1.703, "step": 534900 }, { "epoch": 0.34, "learning_rate": 3.2901056676272815e-05, "loss": 1.6702, "step": 535000 }, { "epoch": 0.34, "eval_loss": 1.739702582359314, "eval_runtime": 93.6787, "eval_samples_per_second": 106.748, "eval_steps_per_second": 6.672, "step": 535000 }, { "epoch": 0.34, "learning_rate": 3.2897854626961255e-05, "loss": 1.6741, "step": 535100 }, { "epoch": 0.34, "learning_rate": 3.28946525776497e-05, "loss": 1.6823, "step": 535200 }, { "epoch": 0.34, "learning_rate": 3.2891450528338134e-05, "loss": 1.7003, "step": 535300 }, { "epoch": 0.34, "learning_rate": 3.288824847902658e-05, "loss": 1.6644, "step": 535400 }, { "epoch": 0.34, "learning_rate": 3.288504642971502e-05, "loss": 1.6712, "step": 535500 }, { "epoch": 0.34, "learning_rate": 3.288184438040346e-05, "loss": 1.6707, "step": 535600 }, { "epoch": 0.34, "learning_rate": 3.28786423310919e-05, "loss": 1.6627, "step": 535700 }, { "epoch": 0.34, "learning_rate": 3.287544028178034e-05, "loss": 1.7039, "step": 535800 }, { "epoch": 0.34, "learning_rate": 3.2872238232468786e-05, "loss": 1.671, "step": 535900 }, { "epoch": 0.34, "learning_rate": 3.286903618315722e-05, "loss": 1.6799, "step": 536000 }, { "epoch": 0.34, "eval_loss": 1.7391116619110107, "eval_runtime": 93.1385, "eval_samples_per_second": 107.367, "eval_steps_per_second": 6.71, "step": 536000 }, { "epoch": 0.34, "learning_rate": 3.2865834133845666e-05, "loss": 1.6889, "step": 536100 }, { "epoch": 0.34, "learning_rate": 3.28626320845341e-05, "loss": 1.6567, "step": 536200 }, { "epoch": 0.34, "learning_rate": 3.2859430035222545e-05, "loss": 1.6818, "step": 536300 }, { "epoch": 0.34, "learning_rate": 3.2856227985910985e-05, "loss": 1.7047, "step": 536400 }, { "epoch": 0.34, "learning_rate": 3.2853025936599424e-05, "loss": 1.6662, "step": 536500 }, { "epoch": 0.34, "learning_rate": 3.2849823887287864e-05, "loss": 1.6819, "step": 536600 }, { "epoch": 0.34, "learning_rate": 3.2846621837976304e-05, "loss": 1.6853, "step": 536700 }, { "epoch": 0.34, "learning_rate": 3.284341978866475e-05, "loss": 1.6839, "step": 536800 }, { "epoch": 0.34, "learning_rate": 3.2840217739353183e-05, "loss": 1.6867, "step": 536900 }, { "epoch": 0.34, "learning_rate": 3.283701569004163e-05, "loss": 1.7113, "step": 537000 }, { "epoch": 0.34, "eval_loss": 1.7396637201309204, "eval_runtime": 90.1045, "eval_samples_per_second": 110.982, "eval_steps_per_second": 6.936, "step": 537000 }, { "epoch": 0.34, "learning_rate": 3.283381364073007e-05, "loss": 1.6683, "step": 537100 }, { "epoch": 0.34, "learning_rate": 3.283061159141851e-05, "loss": 1.6802, "step": 537200 }, { "epoch": 0.34, "learning_rate": 3.282740954210695e-05, "loss": 1.6808, "step": 537300 }, { "epoch": 0.34, "learning_rate": 3.282420749279539e-05, "loss": 1.6666, "step": 537400 }, { "epoch": 0.34, "learning_rate": 3.2821005443483835e-05, "loss": 1.6699, "step": 537500 }, { "epoch": 0.34, "learning_rate": 3.281780339417227e-05, "loss": 1.6693, "step": 537600 }, { "epoch": 0.34, "learning_rate": 3.2814601344860715e-05, "loss": 1.6883, "step": 537700 }, { "epoch": 0.34, "learning_rate": 3.2811399295549155e-05, "loss": 1.6753, "step": 537800 }, { "epoch": 0.34, "learning_rate": 3.2808197246237594e-05, "loss": 1.6909, "step": 537900 }, { "epoch": 0.34, "learning_rate": 3.2804995196926034e-05, "loss": 1.6766, "step": 538000 }, { "epoch": 0.34, "eval_loss": 1.74301016330719, "eval_runtime": 92.8161, "eval_samples_per_second": 107.74, "eval_steps_per_second": 6.734, "step": 538000 }, { "epoch": 0.34, "learning_rate": 3.2801793147614474e-05, "loss": 1.6748, "step": 538100 }, { "epoch": 0.34, "learning_rate": 3.2798591098302914e-05, "loss": 1.6672, "step": 538200 }, { "epoch": 0.34, "learning_rate": 3.279538904899135e-05, "loss": 1.6697, "step": 538300 }, { "epoch": 0.34, "learning_rate": 3.27921869996798e-05, "loss": 1.6798, "step": 538400 }, { "epoch": 0.34, "learning_rate": 3.278898495036823e-05, "loss": 1.6947, "step": 538500 }, { "epoch": 0.34, "learning_rate": 3.278578290105668e-05, "loss": 1.6738, "step": 538600 }, { "epoch": 0.34, "learning_rate": 3.278258085174512e-05, "loss": 1.6709, "step": 538700 }, { "epoch": 0.34, "learning_rate": 3.277937880243356e-05, "loss": 1.6938, "step": 538800 }, { "epoch": 0.34, "learning_rate": 3.2776176753122e-05, "loss": 1.6813, "step": 538900 }, { "epoch": 0.34, "learning_rate": 3.277297470381044e-05, "loss": 1.6777, "step": 539000 }, { "epoch": 0.34, "eval_loss": 1.7388124465942383, "eval_runtime": 93.006, "eval_samples_per_second": 107.52, "eval_steps_per_second": 6.72, "step": 539000 }, { "epoch": 0.35, "learning_rate": 3.2769772654498885e-05, "loss": 1.6863, "step": 539100 }, { "epoch": 0.35, "learning_rate": 3.276657060518732e-05, "loss": 1.6699, "step": 539200 }, { "epoch": 0.35, "learning_rate": 3.2763368555875764e-05, "loss": 1.6859, "step": 539300 }, { "epoch": 0.35, "learning_rate": 3.2760166506564204e-05, "loss": 1.6612, "step": 539400 }, { "epoch": 0.35, "learning_rate": 3.2756964457252644e-05, "loss": 1.6707, "step": 539500 }, { "epoch": 0.35, "learning_rate": 3.2753762407941083e-05, "loss": 1.6928, "step": 539600 }, { "epoch": 0.35, "learning_rate": 3.275056035862952e-05, "loss": 1.6887, "step": 539700 }, { "epoch": 0.35, "learning_rate": 3.274735830931796e-05, "loss": 1.6738, "step": 539800 }, { "epoch": 0.35, "learning_rate": 3.27441562600064e-05, "loss": 1.669, "step": 539900 }, { "epoch": 0.35, "learning_rate": 3.274095421069485e-05, "loss": 1.6825, "step": 540000 }, { "epoch": 0.35, "eval_loss": 1.7420295476913452, "eval_runtime": 92.5859, "eval_samples_per_second": 108.008, "eval_steps_per_second": 6.75, "step": 540000 }, { "epoch": 0.35, "learning_rate": 3.273775216138329e-05, "loss": 1.6638, "step": 540100 }, { "epoch": 0.35, "learning_rate": 3.273455011207173e-05, "loss": 1.6823, "step": 540200 }, { "epoch": 0.35, "learning_rate": 3.273134806276017e-05, "loss": 1.6837, "step": 540300 }, { "epoch": 0.35, "learning_rate": 3.272814601344861e-05, "loss": 1.6802, "step": 540400 }, { "epoch": 0.35, "learning_rate": 3.272494396413705e-05, "loss": 1.6762, "step": 540500 }, { "epoch": 0.35, "learning_rate": 3.272174191482549e-05, "loss": 1.6826, "step": 540600 }, { "epoch": 0.35, "learning_rate": 3.2718539865513934e-05, "loss": 1.6773, "step": 540700 }, { "epoch": 0.35, "learning_rate": 3.2715337816202374e-05, "loss": 1.6638, "step": 540800 }, { "epoch": 0.35, "learning_rate": 3.2712135766890814e-05, "loss": 1.6579, "step": 540900 }, { "epoch": 0.35, "learning_rate": 3.270893371757925e-05, "loss": 1.6688, "step": 541000 }, { "epoch": 0.35, "eval_loss": 1.7429343461990356, "eval_runtime": 98.5068, "eval_samples_per_second": 101.516, "eval_steps_per_second": 6.345, "step": 541000 }, { "epoch": 0.35, "learning_rate": 3.270573166826769e-05, "loss": 1.6789, "step": 541100 }, { "epoch": 0.35, "learning_rate": 3.270252961895613e-05, "loss": 1.6762, "step": 541200 }, { "epoch": 0.35, "learning_rate": 3.269932756964457e-05, "loss": 1.6821, "step": 541300 }, { "epoch": 0.35, "learning_rate": 3.269612552033301e-05, "loss": 1.6747, "step": 541400 }, { "epoch": 0.35, "learning_rate": 3.269292347102145e-05, "loss": 1.6604, "step": 541500 }, { "epoch": 0.35, "learning_rate": 3.26897214217099e-05, "loss": 1.684, "step": 541600 }, { "epoch": 0.35, "learning_rate": 3.268651937239834e-05, "loss": 1.6706, "step": 541700 }, { "epoch": 0.35, "learning_rate": 3.268331732308678e-05, "loss": 1.6754, "step": 541800 }, { "epoch": 0.35, "learning_rate": 3.268011527377522e-05, "loss": 1.6755, "step": 541900 }, { "epoch": 0.35, "learning_rate": 3.267691322446366e-05, "loss": 1.6843, "step": 542000 }, { "epoch": 0.35, "eval_loss": 1.73605215549469, "eval_runtime": 90.3627, "eval_samples_per_second": 110.665, "eval_steps_per_second": 6.917, "step": 542000 }, { "epoch": 0.35, "learning_rate": 3.26737111751521e-05, "loss": 1.6795, "step": 542100 }, { "epoch": 0.35, "learning_rate": 3.267050912584054e-05, "loss": 1.6911, "step": 542200 }, { "epoch": 0.35, "learning_rate": 3.266730707652898e-05, "loss": 1.657, "step": 542300 }, { "epoch": 0.35, "learning_rate": 3.266410502721742e-05, "loss": 1.6955, "step": 542400 }, { "epoch": 0.35, "learning_rate": 3.266090297790586e-05, "loss": 1.6499, "step": 542500 }, { "epoch": 0.35, "learning_rate": 3.26577009285943e-05, "loss": 1.6913, "step": 542600 }, { "epoch": 0.35, "learning_rate": 3.265449887928274e-05, "loss": 1.6602, "step": 542700 }, { "epoch": 0.35, "learning_rate": 3.265129682997118e-05, "loss": 1.6793, "step": 542800 }, { "epoch": 0.35, "learning_rate": 3.264809478065962e-05, "loss": 1.6809, "step": 542900 }, { "epoch": 0.35, "learning_rate": 3.264489273134806e-05, "loss": 1.6615, "step": 543000 }, { "epoch": 0.35, "eval_loss": 1.7422709465026855, "eval_runtime": 92.3367, "eval_samples_per_second": 108.299, "eval_steps_per_second": 6.769, "step": 543000 }, { "epoch": 0.35, "learning_rate": 3.264169068203651e-05, "loss": 1.6736, "step": 543100 }, { "epoch": 0.35, "learning_rate": 3.263848863272495e-05, "loss": 1.6805, "step": 543200 }, { "epoch": 0.35, "learning_rate": 3.263528658341339e-05, "loss": 1.6601, "step": 543300 }, { "epoch": 0.35, "learning_rate": 3.263208453410183e-05, "loss": 1.6734, "step": 543400 }, { "epoch": 0.35, "learning_rate": 3.262888248479027e-05, "loss": 1.6836, "step": 543500 }, { "epoch": 0.35, "learning_rate": 3.262568043547871e-05, "loss": 1.6679, "step": 543600 }, { "epoch": 0.35, "learning_rate": 3.262247838616715e-05, "loss": 1.6972, "step": 543700 }, { "epoch": 0.35, "learning_rate": 3.2619276336855586e-05, "loss": 1.6835, "step": 543800 }, { "epoch": 0.35, "learning_rate": 3.2616074287544026e-05, "loss": 1.6785, "step": 543900 }, { "epoch": 0.35, "learning_rate": 3.261287223823247e-05, "loss": 1.6814, "step": 544000 }, { "epoch": 0.35, "eval_loss": 1.7440274953842163, "eval_runtime": 91.3186, "eval_samples_per_second": 109.507, "eval_steps_per_second": 6.844, "step": 544000 }, { "epoch": 0.35, "learning_rate": 3.260967018892091e-05, "loss": 1.6737, "step": 544100 }, { "epoch": 0.35, "learning_rate": 3.260646813960935e-05, "loss": 1.6801, "step": 544200 }, { "epoch": 0.35, "learning_rate": 3.260326609029779e-05, "loss": 1.6621, "step": 544300 }, { "epoch": 0.35, "learning_rate": 3.260006404098623e-05, "loss": 1.688, "step": 544400 }, { "epoch": 0.35, "learning_rate": 3.259686199167467e-05, "loss": 1.677, "step": 544500 }, { "epoch": 0.35, "learning_rate": 3.259365994236311e-05, "loss": 1.6898, "step": 544600 }, { "epoch": 0.35, "learning_rate": 3.259045789305156e-05, "loss": 1.664, "step": 544700 }, { "epoch": 0.35, "learning_rate": 3.258725584374e-05, "loss": 1.6927, "step": 544800 }, { "epoch": 0.35, "learning_rate": 3.258405379442844e-05, "loss": 1.688, "step": 544900 }, { "epoch": 0.35, "learning_rate": 3.258085174511688e-05, "loss": 1.6515, "step": 545000 }, { "epoch": 0.35, "eval_loss": 1.7410451173782349, "eval_runtime": 94.0874, "eval_samples_per_second": 106.284, "eval_steps_per_second": 6.643, "step": 545000 }, { "epoch": 0.35, "learning_rate": 3.2577649695805317e-05, "loss": 1.6642, "step": 545100 }, { "epoch": 0.35, "learning_rate": 3.2574447646493756e-05, "loss": 1.6822, "step": 545200 }, { "epoch": 0.35, "learning_rate": 3.2571245597182196e-05, "loss": 1.679, "step": 545300 }, { "epoch": 0.35, "learning_rate": 3.256804354787064e-05, "loss": 1.6556, "step": 545400 }, { "epoch": 0.35, "learning_rate": 3.2564841498559076e-05, "loss": 1.6689, "step": 545500 }, { "epoch": 0.35, "learning_rate": 3.256163944924752e-05, "loss": 1.6872, "step": 545600 }, { "epoch": 0.35, "learning_rate": 3.255843739993596e-05, "loss": 1.6918, "step": 545700 }, { "epoch": 0.35, "learning_rate": 3.25552353506244e-05, "loss": 1.6755, "step": 545800 }, { "epoch": 0.35, "learning_rate": 3.255203330131284e-05, "loss": 1.6632, "step": 545900 }, { "epoch": 0.35, "learning_rate": 3.254883125200128e-05, "loss": 1.6774, "step": 546000 }, { "epoch": 0.35, "eval_loss": 1.7422646284103394, "eval_runtime": 92.0043, "eval_samples_per_second": 108.691, "eval_steps_per_second": 6.793, "step": 546000 }, { "epoch": 0.35, "learning_rate": 3.254562920268972e-05, "loss": 1.67, "step": 546100 }, { "epoch": 0.35, "learning_rate": 3.254242715337816e-05, "loss": 1.6612, "step": 546200 }, { "epoch": 0.35, "learning_rate": 3.253922510406661e-05, "loss": 1.6811, "step": 546300 }, { "epoch": 0.35, "learning_rate": 3.253602305475505e-05, "loss": 1.6835, "step": 546400 }, { "epoch": 0.35, "learning_rate": 3.2532821005443486e-05, "loss": 1.6712, "step": 546500 }, { "epoch": 0.35, "learning_rate": 3.2529618956131926e-05, "loss": 1.6515, "step": 546600 }, { "epoch": 0.35, "learning_rate": 3.2526416906820366e-05, "loss": 1.6858, "step": 546700 }, { "epoch": 0.35, "learning_rate": 3.2523214857508806e-05, "loss": 1.6705, "step": 546800 }, { "epoch": 0.35, "learning_rate": 3.2520012808197245e-05, "loss": 1.6938, "step": 546900 }, { "epoch": 0.35, "learning_rate": 3.251681075888569e-05, "loss": 1.6761, "step": 547000 }, { "epoch": 0.35, "eval_loss": 1.7418359518051147, "eval_runtime": 91.807, "eval_samples_per_second": 108.924, "eval_steps_per_second": 6.808, "step": 547000 }, { "epoch": 0.35, "learning_rate": 3.2513608709574125e-05, "loss": 1.7055, "step": 547100 }, { "epoch": 0.35, "learning_rate": 3.251040666026257e-05, "loss": 1.6681, "step": 547200 }, { "epoch": 0.35, "learning_rate": 3.250720461095101e-05, "loss": 1.6694, "step": 547300 }, { "epoch": 0.35, "learning_rate": 3.250400256163945e-05, "loss": 1.6586, "step": 547400 }, { "epoch": 0.35, "learning_rate": 3.250080051232789e-05, "loss": 1.6769, "step": 547500 }, { "epoch": 0.35, "learning_rate": 3.249759846301633e-05, "loss": 1.6845, "step": 547600 }, { "epoch": 0.35, "learning_rate": 3.249439641370478e-05, "loss": 1.6816, "step": 547700 }, { "epoch": 0.35, "learning_rate": 3.249119436439321e-05, "loss": 1.6779, "step": 547800 }, { "epoch": 0.35, "learning_rate": 3.2487992315081656e-05, "loss": 1.6627, "step": 547900 }, { "epoch": 0.35, "learning_rate": 3.2484790265770096e-05, "loss": 1.6882, "step": 548000 }, { "epoch": 0.35, "eval_loss": 1.7420125007629395, "eval_runtime": 90.9579, "eval_samples_per_second": 109.941, "eval_steps_per_second": 6.871, "step": 548000 }, { "epoch": 0.35, "learning_rate": 3.2481588216458536e-05, "loss": 1.6746, "step": 548100 }, { "epoch": 0.35, "learning_rate": 3.2478386167146976e-05, "loss": 1.6722, "step": 548200 }, { "epoch": 0.35, "learning_rate": 3.2475184117835415e-05, "loss": 1.6835, "step": 548300 }, { "epoch": 0.35, "learning_rate": 3.2471982068523855e-05, "loss": 1.6655, "step": 548400 }, { "epoch": 0.35, "learning_rate": 3.2468780019212295e-05, "loss": 1.6522, "step": 548500 }, { "epoch": 0.35, "learning_rate": 3.246557796990074e-05, "loss": 1.6876, "step": 548600 }, { "epoch": 0.35, "learning_rate": 3.2462375920589174e-05, "loss": 1.6702, "step": 548700 }, { "epoch": 0.35, "learning_rate": 3.245917387127762e-05, "loss": 1.6641, "step": 548800 }, { "epoch": 0.35, "learning_rate": 3.245597182196606e-05, "loss": 1.6492, "step": 548900 }, { "epoch": 0.35, "learning_rate": 3.24527697726545e-05, "loss": 1.6587, "step": 549000 }, { "epoch": 0.35, "eval_loss": 1.7417964935302734, "eval_runtime": 90.3129, "eval_samples_per_second": 110.726, "eval_steps_per_second": 6.92, "step": 549000 }, { "epoch": 0.35, "learning_rate": 3.244956772334294e-05, "loss": 1.6802, "step": 549100 }, { "epoch": 0.35, "learning_rate": 3.244636567403138e-05, "loss": 1.6982, "step": 549200 }, { "epoch": 0.35, "learning_rate": 3.2443163624719826e-05, "loss": 1.6828, "step": 549300 }, { "epoch": 0.35, "learning_rate": 3.243996157540826e-05, "loss": 1.6846, "step": 549400 }, { "epoch": 0.35, "learning_rate": 3.2436759526096706e-05, "loss": 1.6845, "step": 549500 }, { "epoch": 0.35, "learning_rate": 3.2433557476785145e-05, "loss": 1.6783, "step": 549600 }, { "epoch": 0.35, "learning_rate": 3.2430355427473585e-05, "loss": 1.6735, "step": 549700 }, { "epoch": 0.35, "learning_rate": 3.2427153378162025e-05, "loss": 1.6593, "step": 549800 }, { "epoch": 0.35, "learning_rate": 3.2423951328850465e-05, "loss": 1.6749, "step": 549900 }, { "epoch": 0.35, "learning_rate": 3.242074927953891e-05, "loss": 1.6968, "step": 550000 }, { "epoch": 0.35, "eval_loss": 1.7401636838912964, "eval_runtime": 89.4198, "eval_samples_per_second": 111.832, "eval_steps_per_second": 6.99, "step": 550000 } ], "max_steps": 1562500, "num_train_epochs": 1, "total_flos": 1.5064660726719971e+19, "trial_name": null, "trial_params": null }