{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9646153910676615, "global_step": 690000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.998839665544078e-05, "loss": 1.4934, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9976746711104215e-05, "loss": 1.3318, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.996509676676765e-05, "loss": 1.2797, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.995344682243108e-05, "loss": 1.244, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.994186677776054e-05, "loss": 1.2432, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.993026343320132e-05, "loss": 1.2192, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.991863678875343e-05, "loss": 1.4231, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.990701014430553e-05, "loss": 1.6854, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.989536019996897e-05, "loss": 1.3492, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.98837102556324e-05, "loss": 1.3758, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.987206031129584e-05, "loss": 1.2596, "step": 5500 }, { "epoch": 0.01, "learning_rate": 4.986041036695927e-05, "loss": 1.5168, "step": 6000 }, { "epoch": 0.01, "learning_rate": 4.9848760422622705e-05, "loss": 1.465, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.983713377817481e-05, "loss": 1.408, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.982550713372692e-05, "loss": 1.6209, "step": 7500 }, { "epoch": 0.01, "learning_rate": 4.981385718939035e-05, "loss": 1.681, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.980220724505378e-05, "loss": 1.6519, "step": 8500 }, { "epoch": 0.01, "learning_rate": 4.979055730071722e-05, "loss": 1.8553, "step": 9000 }, { "epoch": 0.01, "learning_rate": 4.9778907356380654e-05, "loss": 1.7466, "step": 9500 }, { "epoch": 0.01, "learning_rate": 4.976728071193276e-05, "loss": 1.7394, "step": 10000 }, { "epoch": 0.01, "learning_rate": 4.9755630767596196e-05, "loss": 1.648, "step": 10500 }, { "epoch": 0.02, "learning_rate": 4.974398082325963e-05, "loss": 1.788, "step": 11000 }, { "epoch": 0.02, "learning_rate": 4.973233087892306e-05, "loss": 2.0314, "step": 11500 }, { "epoch": 0.02, "learning_rate": 4.972068093458649e-05, "loss": 1.7554, "step": 12000 }, { "epoch": 0.02, "learning_rate": 4.97090542901386e-05, "loss": 1.66, "step": 12500 }, { "epoch": 0.02, "learning_rate": 4.969740434580204e-05, "loss": 1.5643, "step": 13000 }, { "epoch": 0.02, "learning_rate": 4.9685777701354144e-05, "loss": 1.4357, "step": 13500 }, { "epoch": 0.02, "learning_rate": 4.967412775701758e-05, "loss": 1.4221, "step": 14000 }, { "epoch": 0.02, "learning_rate": 4.9662477812681016e-05, "loss": 1.3905, "step": 14500 }, { "epoch": 0.02, "learning_rate": 4.965082786834445e-05, "loss": 1.5023, "step": 15000 }, { "epoch": 0.02, "learning_rate": 4.963917792400788e-05, "loss": 1.298, "step": 15500 }, { "epoch": 0.02, "learning_rate": 4.962752797967132e-05, "loss": 1.1518, "step": 16000 }, { "epoch": 0.02, "learning_rate": 4.961587803533475e-05, "loss": 1.1188, "step": 16500 }, { "epoch": 0.02, "learning_rate": 4.9604228090998185e-05, "loss": 1.1016, "step": 17000 }, { "epoch": 0.02, "learning_rate": 4.959257814666162e-05, "loss": 1.0946, "step": 17500 }, { "epoch": 0.03, "learning_rate": 4.958092820232505e-05, "loss": 1.0894, "step": 18000 }, { "epoch": 0.03, "learning_rate": 4.956927825798849e-05, "loss": 1.07, "step": 18500 }, { "epoch": 0.03, "learning_rate": 4.955762831365192e-05, "loss": 1.0765, "step": 19000 }, { "epoch": 0.03, "learning_rate": 4.954597836931535e-05, "loss": 1.1306, "step": 19500 }, { "epoch": 0.03, "learning_rate": 4.9534328424978786e-05, "loss": 1.0957, "step": 20000 }, { "epoch": 0.03, "learning_rate": 4.952267848064222e-05, "loss": 1.0914, "step": 20500 }, { "epoch": 0.03, "learning_rate": 4.951102853630565e-05, "loss": 1.0757, "step": 21000 }, { "epoch": 0.03, "learning_rate": 4.949937859196909e-05, "loss": 1.0464, "step": 21500 }, { "epoch": 0.03, "learning_rate": 4.948772864763253e-05, "loss": 1.0437, "step": 22000 }, { "epoch": 0.03, "learning_rate": 4.947610200318463e-05, "loss": 1.0503, "step": 22500 }, { "epoch": 0.03, "learning_rate": 4.9464452058848064e-05, "loss": 1.0416, "step": 23000 }, { "epoch": 0.03, "learning_rate": 4.9452802114511496e-05, "loss": 1.0405, "step": 23500 }, { "epoch": 0.03, "learning_rate": 4.944115217017493e-05, "loss": 1.0279, "step": 24000 }, { "epoch": 0.03, "learning_rate": 4.942950222583837e-05, "loss": 1.0252, "step": 24500 }, { "epoch": 0.03, "learning_rate": 4.94178522815018e-05, "loss": 1.0149, "step": 25000 }, { "epoch": 0.04, "learning_rate": 4.940620233716524e-05, "loss": 1.0163, "step": 25500 }, { "epoch": 0.04, "learning_rate": 4.939455239282867e-05, "loss": 1.0117, "step": 26000 }, { "epoch": 0.04, "learning_rate": 4.9382902448492104e-05, "loss": 1.0097, "step": 26500 }, { "epoch": 0.04, "learning_rate": 4.9371252504155536e-05, "loss": 1.0097, "step": 27000 }, { "epoch": 0.04, "learning_rate": 4.9359602559818975e-05, "loss": 1.0041, "step": 27500 }, { "epoch": 0.04, "learning_rate": 4.934795261548241e-05, "loss": 1.008, "step": 28000 }, { "epoch": 0.04, "learning_rate": 4.933630267114584e-05, "loss": 1.0131, "step": 28500 }, { "epoch": 0.04, "learning_rate": 4.932465272680927e-05, "loss": 1.0003, "step": 29000 }, { "epoch": 0.04, "learning_rate": 4.931302608236138e-05, "loss": 1.0064, "step": 29500 }, { "epoch": 0.04, "learning_rate": 4.9301376138024815e-05, "loss": 1.0111, "step": 30000 }, { "epoch": 0.04, "learning_rate": 4.928974949357692e-05, "loss": 1.0034, "step": 30500 }, { "epoch": 0.04, "learning_rate": 4.9278099549240357e-05, "loss": 1.0013, "step": 31000 }, { "epoch": 0.04, "learning_rate": 4.926644960490379e-05, "loss": 0.9905, "step": 31500 }, { "epoch": 0.04, "learning_rate": 4.92548229604559e-05, "loss": 0.992, "step": 32000 }, { "epoch": 0.05, "learning_rate": 4.924317301611933e-05, "loss": 0.9861, "step": 32500 }, { "epoch": 0.05, "learning_rate": 4.923152307178276e-05, "loss": 0.9846, "step": 33000 }, { "epoch": 0.05, "learning_rate": 4.9219873127446196e-05, "loss": 0.9831, "step": 33500 }, { "epoch": 0.05, "learning_rate": 4.920822318310963e-05, "loss": 0.985, "step": 34000 }, { "epoch": 0.05, "learning_rate": 4.919657323877307e-05, "loss": 0.9822, "step": 34500 }, { "epoch": 0.05, "learning_rate": 4.91849232944365e-05, "loss": 0.9923, "step": 35000 }, { "epoch": 0.05, "learning_rate": 4.917327335009994e-05, "loss": 0.9875, "step": 35500 }, { "epoch": 0.05, "learning_rate": 4.916162340576337e-05, "loss": 0.9812, "step": 36000 }, { "epoch": 0.05, "learning_rate": 4.9149973461426804e-05, "loss": 0.9751, "step": 36500 }, { "epoch": 0.05, "learning_rate": 4.913832351709024e-05, "loss": 0.9709, "step": 37000 }, { "epoch": 0.05, "learning_rate": 4.9126673572753675e-05, "loss": 0.984, "step": 37500 }, { "epoch": 0.05, "learning_rate": 4.911502362841711e-05, "loss": 0.9851, "step": 38000 }, { "epoch": 0.05, "learning_rate": 4.910337368408054e-05, "loss": 0.9652, "step": 38500 }, { "epoch": 0.05, "learning_rate": 4.909172373974397e-05, "loss": 0.9695, "step": 39000 }, { "epoch": 0.06, "learning_rate": 4.9080073795407405e-05, "loss": 0.9655, "step": 39500 }, { "epoch": 0.06, "learning_rate": 4.9068423851070844e-05, "loss": 0.9629, "step": 40000 }, { "epoch": 0.06, "learning_rate": 4.9056773906734276e-05, "loss": 0.956, "step": 40500 }, { "epoch": 0.06, "learning_rate": 4.904512396239771e-05, "loss": 0.9642, "step": 41000 }, { "epoch": 0.06, "learning_rate": 4.903347401806114e-05, "loss": 0.9581, "step": 41500 }, { "epoch": 0.06, "learning_rate": 4.902182407372457e-05, "loss": 0.9582, "step": 42000 }, { "epoch": 0.06, "learning_rate": 4.901017412938801e-05, "loss": 0.9511, "step": 42500 }, { "epoch": 0.06, "learning_rate": 4.8998524185051445e-05, "loss": 0.9475, "step": 43000 }, { "epoch": 0.06, "learning_rate": 4.8986944140380895e-05, "loss": 0.9716, "step": 43500 }, { "epoch": 0.06, "learning_rate": 4.8975317495933005e-05, "loss": 0.9773, "step": 44000 }, { "epoch": 0.06, "learning_rate": 4.8963667551596444e-05, "loss": 0.9565, "step": 44500 }, { "epoch": 0.06, "learning_rate": 4.8952017607259876e-05, "loss": 0.9489, "step": 45000 }, { "epoch": 0.06, "learning_rate": 4.8940390962811986e-05, "loss": 0.9542, "step": 45500 }, { "epoch": 0.06, "learning_rate": 4.892874101847542e-05, "loss": 0.9575, "step": 46000 }, { "epoch": 0.07, "learning_rate": 4.891711437402752e-05, "loss": 0.954, "step": 46500 }, { "epoch": 0.07, "learning_rate": 4.8905464429690954e-05, "loss": 0.9449, "step": 47000 }, { "epoch": 0.07, "learning_rate": 4.8893814485354386e-05, "loss": 0.9468, "step": 47500 }, { "epoch": 0.07, "learning_rate": 4.8882164541017825e-05, "loss": 0.9457, "step": 48000 }, { "epoch": 0.07, "learning_rate": 4.887051459668126e-05, "loss": 0.9456, "step": 48500 }, { "epoch": 0.07, "learning_rate": 4.88588646523447e-05, "loss": 0.9636, "step": 49000 }, { "epoch": 0.07, "learning_rate": 4.884721470800813e-05, "loss": 0.9583, "step": 49500 }, { "epoch": 0.07, "learning_rate": 4.883558806356023e-05, "loss": 0.9474, "step": 50000 }, { "epoch": 0.07, "learning_rate": 4.882396141911234e-05, "loss": 1.0342, "step": 50500 }, { "epoch": 0.07, "learning_rate": 4.8812311474775774e-05, "loss": 1.0187, "step": 51000 }, { "epoch": 0.07, "learning_rate": 4.880066153043921e-05, "loss": 0.9742, "step": 51500 }, { "epoch": 0.07, "learning_rate": 4.8789011586102645e-05, "loss": 0.9483, "step": 52000 }, { "epoch": 0.07, "learning_rate": 4.877736164176608e-05, "loss": 0.9584, "step": 52500 }, { "epoch": 0.07, "learning_rate": 4.876571169742951e-05, "loss": 0.959, "step": 53000 }, { "epoch": 0.07, "learning_rate": 4.875406175309294e-05, "loss": 0.9821, "step": 53500 }, { "epoch": 0.08, "learning_rate": 4.8742411808756375e-05, "loss": 0.9683, "step": 54000 }, { "epoch": 0.08, "learning_rate": 4.8730785164308485e-05, "loss": 0.9494, "step": 54500 }, { "epoch": 0.08, "learning_rate": 4.8719135219971924e-05, "loss": 0.9441, "step": 55000 }, { "epoch": 0.08, "learning_rate": 4.8707485275635356e-05, "loss": 0.9676, "step": 55500 }, { "epoch": 0.08, "learning_rate": 4.869583533129879e-05, "loss": 0.9357, "step": 56000 }, { "epoch": 0.08, "learning_rate": 4.868418538696222e-05, "loss": 0.9492, "step": 56500 }, { "epoch": 0.08, "learning_rate": 4.867253544262565e-05, "loss": 0.9378, "step": 57000 }, { "epoch": 0.08, "learning_rate": 4.866088549828909e-05, "loss": 0.9318, "step": 57500 }, { "epoch": 0.08, "learning_rate": 4.8649235553952525e-05, "loss": 0.9271, "step": 58000 }, { "epoch": 0.08, "learning_rate": 4.863758560961596e-05, "loss": 0.9432, "step": 58500 }, { "epoch": 0.08, "learning_rate": 4.8625935665279396e-05, "loss": 0.931, "step": 59000 }, { "epoch": 0.08, "learning_rate": 4.861428572094283e-05, "loss": 0.9315, "step": 59500 }, { "epoch": 0.08, "learning_rate": 4.860263577660626e-05, "loss": 0.9427, "step": 60000 }, { "epoch": 0.08, "learning_rate": 4.859100913215837e-05, "loss": 0.9496, "step": 60500 }, { "epoch": 0.09, "learning_rate": 4.85793591878218e-05, "loss": 0.9518, "step": 61000 }, { "epoch": 0.09, "learning_rate": 4.8567709243485235e-05, "loss": 0.933, "step": 61500 }, { "epoch": 0.09, "learning_rate": 4.855605929914867e-05, "loss": 0.9394, "step": 62000 }, { "epoch": 0.09, "learning_rate": 4.854443265470078e-05, "loss": 0.9464, "step": 62500 }, { "epoch": 0.09, "learning_rate": 4.853278271036421e-05, "loss": 0.9254, "step": 63000 }, { "epoch": 0.09, "learning_rate": 4.852113276602764e-05, "loss": 0.9263, "step": 63500 }, { "epoch": 0.09, "learning_rate": 4.850948282169108e-05, "loss": 0.9255, "step": 64000 }, { "epoch": 0.09, "learning_rate": 4.8497832877354514e-05, "loss": 0.9209, "step": 64500 }, { "epoch": 0.09, "learning_rate": 4.8486182933017946e-05, "loss": 0.9194, "step": 65000 }, { "epoch": 0.09, "learning_rate": 4.847453298868138e-05, "loss": 0.9198, "step": 65500 }, { "epoch": 0.09, "learning_rate": 4.846288304434482e-05, "loss": 0.9218, "step": 66000 }, { "epoch": 0.09, "learning_rate": 4.845123310000825e-05, "loss": 0.9217, "step": 66500 }, { "epoch": 0.09, "learning_rate": 4.843958315567169e-05, "loss": 0.9279, "step": 67000 }, { "epoch": 0.09, "learning_rate": 4.842793321133512e-05, "loss": 0.9383, "step": 67500 }, { "epoch": 0.1, "learning_rate": 4.8416283266998554e-05, "loss": 0.9232, "step": 68000 }, { "epoch": 0.1, "learning_rate": 4.8404633322661986e-05, "loss": 0.9182, "step": 68500 }, { "epoch": 0.1, "learning_rate": 4.839300667821409e-05, "loss": 0.9315, "step": 69000 }, { "epoch": 0.1, "learning_rate": 4.838135673387753e-05, "loss": 0.9224, "step": 69500 }, { "epoch": 0.1, "learning_rate": 4.836973008942963e-05, "loss": 0.9153, "step": 70000 }, { "epoch": 0.1, "learning_rate": 4.835808014509307e-05, "loss": 0.9168, "step": 70500 }, { "epoch": 0.1, "learning_rate": 4.83464302007565e-05, "loss": 0.9121, "step": 71000 }, { "epoch": 0.1, "learning_rate": 4.8334780256419935e-05, "loss": 0.9196, "step": 71500 }, { "epoch": 0.1, "learning_rate": 4.832313031208337e-05, "loss": 0.9037, "step": 72000 }, { "epoch": 0.1, "learning_rate": 4.8311480367746806e-05, "loss": 0.9065, "step": 72500 }, { "epoch": 0.1, "learning_rate": 4.829983042341024e-05, "loss": 0.9086, "step": 73000 }, { "epoch": 0.1, "learning_rate": 4.828818047907368e-05, "loss": 0.9017, "step": 73500 }, { "epoch": 0.1, "learning_rate": 4.827653053473711e-05, "loss": 0.9111, "step": 74000 }, { "epoch": 0.1, "learning_rate": 4.826488059040054e-05, "loss": 0.9074, "step": 74500 }, { "epoch": 0.1, "learning_rate": 4.8253230646063975e-05, "loss": 0.9048, "step": 75000 }, { "epoch": 0.11, "learning_rate": 4.824158070172741e-05, "loss": 0.9062, "step": 75500 }, { "epoch": 0.11, "learning_rate": 4.8229930757390847e-05, "loss": 0.9056, "step": 76000 }, { "epoch": 0.11, "learning_rate": 4.821830411294295e-05, "loss": 0.9076, "step": 76500 }, { "epoch": 0.11, "learning_rate": 4.820665416860639e-05, "loss": 0.9077, "step": 77000 }, { "epoch": 0.11, "learning_rate": 4.819502752415849e-05, "loss": 0.9056, "step": 77500 }, { "epoch": 0.11, "learning_rate": 4.8183377579821924e-05, "loss": 0.9144, "step": 78000 }, { "epoch": 0.11, "learning_rate": 4.8171727635485356e-05, "loss": 0.9062, "step": 78500 }, { "epoch": 0.11, "learning_rate": 4.816007769114879e-05, "loss": 0.9229, "step": 79000 }, { "epoch": 0.11, "learning_rate": 4.814842774681223e-05, "loss": 0.9129, "step": 79500 }, { "epoch": 0.11, "learning_rate": 4.813677780247566e-05, "loss": 0.9158, "step": 80000 }, { "epoch": 0.11, "learning_rate": 4.81251278581391e-05, "loss": 0.9036, "step": 80500 }, { "epoch": 0.11, "learning_rate": 4.81135012136912e-05, "loss": 0.9322, "step": 81000 }, { "epoch": 0.11, "learning_rate": 4.8101851269354634e-05, "loss": 0.9122, "step": 81500 }, { "epoch": 0.11, "learning_rate": 4.809020132501807e-05, "loss": 0.9021, "step": 82000 }, { "epoch": 0.12, "learning_rate": 4.8078551380681506e-05, "loss": 0.8997, "step": 82500 }, { "epoch": 0.12, "learning_rate": 4.806690143634494e-05, "loss": 0.901, "step": 83000 }, { "epoch": 0.12, "learning_rate": 4.805525149200837e-05, "loss": 0.8983, "step": 83500 }, { "epoch": 0.12, "learning_rate": 4.804360154767181e-05, "loss": 0.898, "step": 84000 }, { "epoch": 0.12, "learning_rate": 4.803195160333524e-05, "loss": 0.8959, "step": 84500 }, { "epoch": 0.12, "learning_rate": 4.802034825877602e-05, "loss": 0.9191, "step": 85000 }, { "epoch": 0.12, "learning_rate": 4.8008698314439455e-05, "loss": 0.9007, "step": 85500 }, { "epoch": 0.12, "learning_rate": 4.799704837010289e-05, "loss": 0.8908, "step": 86000 }, { "epoch": 0.12, "learning_rate": 4.7985398425766326e-05, "loss": 0.9045, "step": 86500 }, { "epoch": 0.12, "learning_rate": 4.797374848142976e-05, "loss": 0.8994, "step": 87000 }, { "epoch": 0.12, "learning_rate": 4.796209853709319e-05, "loss": 0.8981, "step": 87500 }, { "epoch": 0.12, "learning_rate": 4.795044859275662e-05, "loss": 0.8943, "step": 88000 }, { "epoch": 0.12, "learning_rate": 4.7938798648420056e-05, "loss": 0.9004, "step": 88500 }, { "epoch": 0.12, "learning_rate": 4.7927148704083495e-05, "loss": 0.8906, "step": 89000 }, { "epoch": 0.13, "learning_rate": 4.791549875974693e-05, "loss": 0.8881, "step": 89500 }, { "epoch": 0.13, "learning_rate": 4.790384881541036e-05, "loss": 0.889, "step": 90000 }, { "epoch": 0.13, "learning_rate": 4.78921988710738e-05, "loss": 0.8866, "step": 90500 }, { "epoch": 0.13, "learning_rate": 4.788054892673723e-05, "loss": 0.8868, "step": 91000 }, { "epoch": 0.13, "learning_rate": 4.7868922282289334e-05, "loss": 0.8909, "step": 91500 }, { "epoch": 0.13, "learning_rate": 4.785727233795277e-05, "loss": 0.8825, "step": 92000 }, { "epoch": 0.13, "learning_rate": 4.7845622393616205e-05, "loss": 0.884, "step": 92500 }, { "epoch": 0.13, "learning_rate": 4.783397244927964e-05, "loss": 0.8906, "step": 93000 }, { "epoch": 0.13, "learning_rate": 4.782232250494307e-05, "loss": 0.8841, "step": 93500 }, { "epoch": 0.13, "learning_rate": 4.781067256060651e-05, "loss": 0.8847, "step": 94000 }, { "epoch": 0.13, "learning_rate": 4.779902261626994e-05, "loss": 0.8936, "step": 94500 }, { "epoch": 0.13, "learning_rate": 4.7787395971822045e-05, "loss": 0.8905, "step": 95000 }, { "epoch": 0.13, "learning_rate": 4.7775769327374154e-05, "loss": 0.8817, "step": 95500 }, { "epoch": 0.13, "learning_rate": 4.776411938303759e-05, "loss": 0.8854, "step": 96000 }, { "epoch": 0.13, "learning_rate": 4.7752492738589696e-05, "loss": 0.8862, "step": 96500 }, { "epoch": 0.14, "learning_rate": 4.7740842794253135e-05, "loss": 0.8958, "step": 97000 }, { "epoch": 0.14, "learning_rate": 4.772919284991657e-05, "loss": 0.9013, "step": 97500 }, { "epoch": 0.14, "learning_rate": 4.771754290558e-05, "loss": 0.8853, "step": 98000 }, { "epoch": 0.14, "learning_rate": 4.770589296124343e-05, "loss": 0.8809, "step": 98500 }, { "epoch": 0.14, "learning_rate": 4.769424301690687e-05, "loss": 0.8817, "step": 99000 }, { "epoch": 0.14, "learning_rate": 4.7682593072570304e-05, "loss": 0.8817, "step": 99500 }, { "epoch": 0.14, "learning_rate": 4.7670943128233736e-05, "loss": 0.881, "step": 100000 }, { "epoch": 0.14, "learning_rate": 4.765929318389717e-05, "loss": 0.9004, "step": 100500 }, { "epoch": 0.14, "learning_rate": 4.76476432395606e-05, "loss": 0.8794, "step": 101000 }, { "epoch": 0.14, "learning_rate": 4.7635993295224033e-05, "loss": 0.8715, "step": 101500 }, { "epoch": 0.14, "learning_rate": 4.762434335088747e-05, "loss": 0.8809, "step": 102000 }, { "epoch": 0.14, "learning_rate": 4.7612693406550905e-05, "loss": 0.8814, "step": 102500 }, { "epoch": 0.14, "learning_rate": 4.760104346221434e-05, "loss": 0.8783, "step": 103000 }, { "epoch": 0.14, "learning_rate": 4.758939351787777e-05, "loss": 0.8761, "step": 103500 }, { "epoch": 0.15, "learning_rate": 4.757776687342988e-05, "loss": 0.8793, "step": 104000 }, { "epoch": 0.15, "learning_rate": 4.756611692909331e-05, "loss": 0.8768, "step": 104500 }, { "epoch": 0.15, "learning_rate": 4.755446698475675e-05, "loss": 0.8768, "step": 105000 }, { "epoch": 0.15, "learning_rate": 4.754281704042018e-05, "loss": 0.8745, "step": 105500 }, { "epoch": 0.15, "learning_rate": 4.7531167096083616e-05, "loss": 0.8727, "step": 106000 }, { "epoch": 0.15, "learning_rate": 4.7519563751524396e-05, "loss": 0.8746, "step": 106500 }, { "epoch": 0.15, "learning_rate": 4.7507913807187835e-05, "loss": 0.8743, "step": 107000 }, { "epoch": 0.15, "learning_rate": 4.749626386285127e-05, "loss": 0.8775, "step": 107500 }, { "epoch": 0.15, "learning_rate": 4.74846139185147e-05, "loss": 0.8712, "step": 108000 }, { "epoch": 0.15, "learning_rate": 4.747296397417814e-05, "loss": 0.8725, "step": 108500 }, { "epoch": 0.15, "learning_rate": 4.746133732973024e-05, "loss": 0.8777, "step": 109000 }, { "epoch": 0.15, "learning_rate": 4.7449687385393674e-05, "loss": 0.8891, "step": 109500 }, { "epoch": 0.15, "learning_rate": 4.7438037441057106e-05, "loss": 0.8747, "step": 110000 }, { "epoch": 0.15, "learning_rate": 4.7426387496720546e-05, "loss": 0.8706, "step": 110500 }, { "epoch": 0.16, "learning_rate": 4.741476085227265e-05, "loss": 0.8772, "step": 111000 }, { "epoch": 0.16, "learning_rate": 4.740311090793608e-05, "loss": 0.8666, "step": 111500 }, { "epoch": 0.16, "learning_rate": 4.739146096359951e-05, "loss": 0.8743, "step": 112000 }, { "epoch": 0.16, "learning_rate": 4.737981101926295e-05, "loss": 0.8679, "step": 112500 }, { "epoch": 0.16, "learning_rate": 4.736818437481506e-05, "loss": 0.8814, "step": 113000 }, { "epoch": 0.16, "learning_rate": 4.7356534430478494e-05, "loss": 0.8725, "step": 113500 }, { "epoch": 0.16, "learning_rate": 4.734488448614193e-05, "loss": 0.8693, "step": 114000 }, { "epoch": 0.16, "learning_rate": 4.733323454180536e-05, "loss": 0.8713, "step": 114500 }, { "epoch": 0.16, "learning_rate": 4.732158459746879e-05, "loss": 0.8712, "step": 115000 }, { "epoch": 0.16, "learning_rate": 4.73099579530209e-05, "loss": 0.8651, "step": 115500 }, { "epoch": 0.16, "learning_rate": 4.729830800868434e-05, "loss": 0.8671, "step": 116000 }, { "epoch": 0.16, "learning_rate": 4.728665806434777e-05, "loss": 0.8665, "step": 116500 }, { "epoch": 0.16, "learning_rate": 4.7275008120011205e-05, "loss": 0.8701, "step": 117000 }, { "epoch": 0.16, "learning_rate": 4.726335817567464e-05, "loss": 0.8673, "step": 117500 }, { "epoch": 0.16, "learning_rate": 4.725170823133807e-05, "loss": 0.8647, "step": 118000 }, { "epoch": 0.17, "learning_rate": 4.724005828700151e-05, "loss": 0.8629, "step": 118500 }, { "epoch": 0.17, "learning_rate": 4.722840834266494e-05, "loss": 0.866, "step": 119000 }, { "epoch": 0.17, "learning_rate": 4.7216758398328374e-05, "loss": 0.8644, "step": 119500 }, { "epoch": 0.17, "learning_rate": 4.7205108453991806e-05, "loss": 0.8627, "step": 120000 }, { "epoch": 0.17, "learning_rate": 4.7193481809543916e-05, "loss": 0.8642, "step": 120500 }, { "epoch": 0.17, "learning_rate": 4.718183186520735e-05, "loss": 0.8631, "step": 121000 }, { "epoch": 0.17, "learning_rate": 4.717018192087078e-05, "loss": 0.8719, "step": 121500 }, { "epoch": 0.17, "learning_rate": 4.715853197653422e-05, "loss": 0.876, "step": 122000 }, { "epoch": 0.17, "learning_rate": 4.714688203219765e-05, "loss": 0.8672, "step": 122500 }, { "epoch": 0.17, "learning_rate": 4.713525538774976e-05, "loss": 0.871, "step": 123000 }, { "epoch": 0.17, "learning_rate": 4.7123605443413194e-05, "loss": 0.8581, "step": 123500 }, { "epoch": 0.17, "learning_rate": 4.7111978798965303e-05, "loss": 0.867, "step": 124000 }, { "epoch": 0.17, "learning_rate": 4.7100328854628736e-05, "loss": 0.87, "step": 124500 }, { "epoch": 0.17, "learning_rate": 4.708870221018084e-05, "loss": 0.8602, "step": 125000 }, { "epoch": 0.18, "learning_rate": 4.707705226584427e-05, "loss": 0.8628, "step": 125500 }, { "epoch": 0.18, "learning_rate": 4.706540232150771e-05, "loss": 0.8648, "step": 126000 }, { "epoch": 0.18, "learning_rate": 4.705375237717114e-05, "loss": 0.8581, "step": 126500 }, { "epoch": 0.18, "learning_rate": 4.704210243283458e-05, "loss": 0.861, "step": 127000 }, { "epoch": 0.18, "learning_rate": 4.7030452488498014e-05, "loss": 0.8733, "step": 127500 }, { "epoch": 0.18, "learning_rate": 4.7018802544161446e-05, "loss": 0.8674, "step": 128000 }, { "epoch": 0.18, "learning_rate": 4.7007152599824886e-05, "loss": 0.86, "step": 128500 }, { "epoch": 0.18, "learning_rate": 4.699550265548832e-05, "loss": 0.8552, "step": 129000 }, { "epoch": 0.18, "learning_rate": 4.698385271115175e-05, "loss": 0.8596, "step": 129500 }, { "epoch": 0.18, "learning_rate": 4.697220276681518e-05, "loss": 0.8566, "step": 130000 }, { "epoch": 0.18, "learning_rate": 4.6960552822478615e-05, "loss": 0.8625, "step": 130500 }, { "epoch": 0.18, "learning_rate": 4.6948926178030725e-05, "loss": 0.8611, "step": 131000 }, { "epoch": 0.18, "learning_rate": 4.693727623369416e-05, "loss": 0.8611, "step": 131500 }, { "epoch": 0.18, "learning_rate": 4.6925626289357596e-05, "loss": 0.8582, "step": 132000 }, { "epoch": 0.19, "learning_rate": 4.691397634502103e-05, "loss": 0.8596, "step": 132500 }, { "epoch": 0.19, "learning_rate": 4.690232640068446e-05, "loss": 0.8542, "step": 133000 }, { "epoch": 0.19, "learning_rate": 4.689067645634789e-05, "loss": 0.8568, "step": 133500 }, { "epoch": 0.19, "learning_rate": 4.68790498119e-05, "loss": 0.8594, "step": 134000 }, { "epoch": 0.19, "learning_rate": 4.6867399867563435e-05, "loss": 0.8529, "step": 134500 }, { "epoch": 0.19, "learning_rate": 4.6855749923226874e-05, "loss": 0.8501, "step": 135000 }, { "epoch": 0.19, "learning_rate": 4.684409997889031e-05, "loss": 0.8544, "step": 135500 }, { "epoch": 0.19, "learning_rate": 4.683245003455374e-05, "loss": 0.8526, "step": 136000 }, { "epoch": 0.19, "learning_rate": 4.682080009021717e-05, "loss": 0.8649, "step": 136500 }, { "epoch": 0.19, "learning_rate": 4.6809150145880604e-05, "loss": 0.852, "step": 137000 }, { "epoch": 0.19, "learning_rate": 4.6797500201544036e-05, "loss": 0.8574, "step": 137500 }, { "epoch": 0.19, "learning_rate": 4.6785850257207476e-05, "loss": 0.8558, "step": 138000 }, { "epoch": 0.19, "learning_rate": 4.677420031287091e-05, "loss": 0.8509, "step": 138500 }, { "epoch": 0.19, "learning_rate": 4.676255036853434e-05, "loss": 0.8587, "step": 139000 }, { "epoch": 0.2, "learning_rate": 4.675090042419777e-05, "loss": 0.8534, "step": 139500 }, { "epoch": 0.2, "learning_rate": 4.6739250479861205e-05, "loss": 0.8504, "step": 140000 }, { "epoch": 0.2, "learning_rate": 4.6727600535524644e-05, "loss": 0.8564, "step": 140500 }, { "epoch": 0.2, "learning_rate": 4.6715973891076754e-05, "loss": 0.8495, "step": 141000 }, { "epoch": 0.2, "learning_rate": 4.6704323946740186e-05, "loss": 0.8519, "step": 141500 }, { "epoch": 0.2, "learning_rate": 4.669267400240362e-05, "loss": 0.8472, "step": 142000 }, { "epoch": 0.2, "learning_rate": 4.668102405806705e-05, "loss": 0.8479, "step": 142500 }, { "epoch": 0.2, "learning_rate": 4.666937411373048e-05, "loss": 0.8503, "step": 143000 }, { "epoch": 0.2, "learning_rate": 4.6657724169393916e-05, "loss": 0.8516, "step": 143500 }, { "epoch": 0.2, "learning_rate": 4.664609752494603e-05, "loss": 0.849, "step": 144000 }, { "epoch": 0.2, "learning_rate": 4.6634470880498135e-05, "loss": 0.8481, "step": 144500 }, { "epoch": 0.2, "learning_rate": 4.6622844236050244e-05, "loss": 0.8456, "step": 145000 }, { "epoch": 0.2, "learning_rate": 4.661119429171368e-05, "loss": 0.8506, "step": 145500 }, { "epoch": 0.2, "learning_rate": 4.659954434737711e-05, "loss": 0.8504, "step": 146000 }, { "epoch": 0.2, "learning_rate": 4.658789440304054e-05, "loss": 0.848, "step": 146500 }, { "epoch": 0.21, "learning_rate": 4.6576244458703974e-05, "loss": 0.8492, "step": 147000 }, { "epoch": 0.21, "learning_rate": 4.656459451436741e-05, "loss": 0.849, "step": 147500 }, { "epoch": 0.21, "learning_rate": 4.6552944570030846e-05, "loss": 0.8546, "step": 148000 }, { "epoch": 0.21, "learning_rate": 4.6541294625694285e-05, "loss": 0.8427, "step": 148500 }, { "epoch": 0.21, "learning_rate": 4.652964468135772e-05, "loss": 0.8508, "step": 149000 }, { "epoch": 0.21, "learning_rate": 4.651799473702115e-05, "loss": 0.8483, "step": 149500 }, { "epoch": 0.21, "learning_rate": 4.650634479268458e-05, "loss": 0.848, "step": 150000 }, { "epoch": 0.21, "learning_rate": 4.649469484834802e-05, "loss": 0.8455, "step": 150500 }, { "epoch": 0.21, "learning_rate": 4.6483091503788794e-05, "loss": 0.8497, "step": 151000 }, { "epoch": 0.21, "learning_rate": 4.6471441559452233e-05, "loss": 0.852, "step": 151500 }, { "epoch": 0.21, "learning_rate": 4.6459791615115666e-05, "loss": 0.8497, "step": 152000 }, { "epoch": 0.21, "learning_rate": 4.64481416707791e-05, "loss": 0.8444, "step": 152500 }, { "epoch": 0.21, "learning_rate": 4.643649172644253e-05, "loss": 0.8655, "step": 153000 }, { "epoch": 0.21, "learning_rate": 4.642484178210596e-05, "loss": 0.843, "step": 153500 }, { "epoch": 0.22, "learning_rate": 4.6413191837769395e-05, "loss": 0.8427, "step": 154000 }, { "epoch": 0.22, "learning_rate": 4.6401541893432834e-05, "loss": 0.8416, "step": 154500 }, { "epoch": 0.22, "learning_rate": 4.6389891949096274e-05, "loss": 0.8401, "step": 155000 }, { "epoch": 0.22, "learning_rate": 4.6378242004759706e-05, "loss": 0.8429, "step": 155500 }, { "epoch": 0.22, "learning_rate": 4.636659206042314e-05, "loss": 0.8458, "step": 156000 }, { "epoch": 0.22, "learning_rate": 4.635494211608657e-05, "loss": 0.8574, "step": 156500 }, { "epoch": 0.22, "learning_rate": 4.634329217175001e-05, "loss": 0.845, "step": 157000 }, { "epoch": 0.22, "learning_rate": 4.633168882719078e-05, "loss": 0.8415, "step": 157500 }, { "epoch": 0.22, "learning_rate": 4.632003888285422e-05, "loss": 0.8424, "step": 158000 }, { "epoch": 0.22, "learning_rate": 4.6308388938517655e-05, "loss": 0.8431, "step": 158500 }, { "epoch": 0.22, "learning_rate": 4.629673899418109e-05, "loss": 0.8437, "step": 159000 }, { "epoch": 0.22, "learning_rate": 4.628508904984452e-05, "loss": 0.8367, "step": 159500 }, { "epoch": 0.22, "learning_rate": 4.627343910550795e-05, "loss": 0.8404, "step": 160000 }, { "epoch": 0.22, "learning_rate": 4.626181246106006e-05, "loss": 0.8385, "step": 160500 }, { "epoch": 0.23, "learning_rate": 4.62501625167235e-05, "loss": 0.8422, "step": 161000 }, { "epoch": 0.23, "learning_rate": 4.623851257238693e-05, "loss": 0.84, "step": 161500 }, { "epoch": 0.23, "learning_rate": 4.6226862628050365e-05, "loss": 0.8419, "step": 162000 }, { "epoch": 0.23, "learning_rate": 4.62152126837138e-05, "loss": 0.8398, "step": 162500 }, { "epoch": 0.23, "learning_rate": 4.620356273937723e-05, "loss": 0.8415, "step": 163000 }, { "epoch": 0.23, "learning_rate": 4.619191279504066e-05, "loss": 0.8439, "step": 163500 }, { "epoch": 0.23, "learning_rate": 4.61802628507041e-05, "loss": 0.8414, "step": 164000 }, { "epoch": 0.23, "learning_rate": 4.6168612906367534e-05, "loss": 0.8402, "step": 164500 }, { "epoch": 0.23, "learning_rate": 4.6156962962030966e-05, "loss": 0.8433, "step": 165000 }, { "epoch": 0.23, "learning_rate": 4.6145313017694405e-05, "loss": 0.8403, "step": 165500 }, { "epoch": 0.23, "learning_rate": 4.613368637324651e-05, "loss": 0.8376, "step": 166000 }, { "epoch": 0.23, "learning_rate": 4.612203642890994e-05, "loss": 0.8375, "step": 166500 }, { "epoch": 0.23, "learning_rate": 4.611038648457338e-05, "loss": 0.8396, "step": 167000 }, { "epoch": 0.23, "learning_rate": 4.609873654023681e-05, "loss": 0.8345, "step": 167500 }, { "epoch": 0.23, "learning_rate": 4.6087086595900245e-05, "loss": 0.8361, "step": 168000 }, { "epoch": 0.24, "learning_rate": 4.6075436651563684e-05, "loss": 0.8351, "step": 168500 }, { "epoch": 0.24, "learning_rate": 4.6063786707227116e-05, "loss": 0.8379, "step": 169000 }, { "epoch": 0.24, "learning_rate": 4.605213676289055e-05, "loss": 0.8309, "step": 169500 }, { "epoch": 0.24, "learning_rate": 4.604048681855399e-05, "loss": 0.8381, "step": 170000 }, { "epoch": 0.24, "learning_rate": 4.602883687421742e-05, "loss": 0.832, "step": 170500 }, { "epoch": 0.24, "learning_rate": 4.601718692988085e-05, "loss": 0.8333, "step": 171000 }, { "epoch": 0.24, "learning_rate": 4.6005536985544285e-05, "loss": 0.8338, "step": 171500 }, { "epoch": 0.24, "learning_rate": 4.599388704120772e-05, "loss": 0.8335, "step": 172000 }, { "epoch": 0.24, "learning_rate": 4.5982237096871156e-05, "loss": 0.8351, "step": 172500 }, { "epoch": 0.24, "learning_rate": 4.597058715253459e-05, "loss": 0.8336, "step": 173000 }, { "epoch": 0.24, "learning_rate": 4.595893720819802e-05, "loss": 0.8344, "step": 173500 }, { "epoch": 0.24, "learning_rate": 4.5947287263861453e-05, "loss": 0.8373, "step": 174000 }, { "epoch": 0.24, "learning_rate": 4.593570721919091e-05, "loss": 0.8386, "step": 174500 }, { "epoch": 0.24, "learning_rate": 4.592405727485434e-05, "loss": 0.8302, "step": 175000 }, { "epoch": 0.25, "learning_rate": 4.5912407330517775e-05, "loss": 0.8354, "step": 175500 }, { "epoch": 0.25, "learning_rate": 4.590075738618121e-05, "loss": 0.8339, "step": 176000 }, { "epoch": 0.25, "learning_rate": 4.588913074173332e-05, "loss": 0.8419, "step": 176500 }, { "epoch": 0.25, "learning_rate": 4.587750409728542e-05, "loss": 0.8406, "step": 177000 }, { "epoch": 0.25, "learning_rate": 4.586585415294886e-05, "loss": 0.835, "step": 177500 }, { "epoch": 0.25, "learning_rate": 4.585420420861229e-05, "loss": 0.8351, "step": 178000 }, { "epoch": 0.25, "learning_rate": 4.584255426427573e-05, "loss": 0.8365, "step": 178500 }, { "epoch": 0.25, "learning_rate": 4.5830927619827834e-05, "loss": 0.8316, "step": 179000 }, { "epoch": 0.25, "learning_rate": 4.5819277675491266e-05, "loss": 0.8244, "step": 179500 }, { "epoch": 0.25, "learning_rate": 4.58076277311547e-05, "loss": 0.8276, "step": 180000 }, { "epoch": 0.25, "learning_rate": 4.579597778681814e-05, "loss": 0.8323, "step": 180500 }, { "epoch": 0.25, "learning_rate": 4.578432784248157e-05, "loss": 0.8283, "step": 181000 }, { "epoch": 0.25, "learning_rate": 4.5772677898145e-05, "loss": 0.829, "step": 181500 }, { "epoch": 0.25, "learning_rate": 4.576102795380844e-05, "loss": 0.832, "step": 182000 }, { "epoch": 0.26, "learning_rate": 4.5749378009471874e-05, "loss": 0.8349, "step": 182500 }, { "epoch": 0.26, "learning_rate": 4.5737728065135306e-05, "loss": 0.8335, "step": 183000 }, { "epoch": 0.26, "learning_rate": 4.5726078120798746e-05, "loss": 0.8316, "step": 183500 }, { "epoch": 0.26, "learning_rate": 4.571442817646218e-05, "loss": 0.8307, "step": 184000 }, { "epoch": 0.26, "learning_rate": 4.570277823212561e-05, "loss": 0.8273, "step": 184500 }, { "epoch": 0.26, "learning_rate": 4.569112828778904e-05, "loss": 0.8277, "step": 185000 }, { "epoch": 0.26, "learning_rate": 4.5679478343452475e-05, "loss": 0.8278, "step": 185500 }, { "epoch": 0.26, "learning_rate": 4.5667828399115914e-05, "loss": 0.8298, "step": 186000 }, { "epoch": 0.26, "learning_rate": 4.5656178454779347e-05, "loss": 0.8277, "step": 186500 }, { "epoch": 0.26, "learning_rate": 4.5644551810331456e-05, "loss": 0.8248, "step": 187000 }, { "epoch": 0.26, "learning_rate": 4.563290186599489e-05, "loss": 0.8233, "step": 187500 }, { "epoch": 0.26, "learning_rate": 4.562125192165832e-05, "loss": 0.8258, "step": 188000 }, { "epoch": 0.26, "learning_rate": 4.560960197732175e-05, "loss": 0.8208, "step": 188500 }, { "epoch": 0.26, "learning_rate": 4.5597952032985186e-05, "loss": 0.823, "step": 189000 }, { "epoch": 0.26, "learning_rate": 4.5586302088648625e-05, "loss": 0.8265, "step": 189500 }, { "epoch": 0.27, "learning_rate": 4.557465214431206e-05, "loss": 0.8266, "step": 190000 }, { "epoch": 0.27, "learning_rate": 4.556300219997549e-05, "loss": 0.8288, "step": 190500 }, { "epoch": 0.27, "learning_rate": 4.55513755555276e-05, "loss": 0.8261, "step": 191000 }, { "epoch": 0.27, "learning_rate": 4.553972561119103e-05, "loss": 0.827, "step": 191500 }, { "epoch": 0.27, "learning_rate": 4.5528075666854464e-05, "loss": 0.8253, "step": 192000 }, { "epoch": 0.27, "learning_rate": 4.5516449022406574e-05, "loss": 0.824, "step": 192500 }, { "epoch": 0.27, "learning_rate": 4.550479907807001e-05, "loss": 0.8229, "step": 193000 }, { "epoch": 0.27, "learning_rate": 4.5493149133733445e-05, "loss": 0.8278, "step": 193500 }, { "epoch": 0.27, "learning_rate": 4.548149918939688e-05, "loss": 0.823, "step": 194000 }, { "epoch": 0.27, "learning_rate": 4.546984924506031e-05, "loss": 0.8241, "step": 194500 }, { "epoch": 0.27, "learning_rate": 4.545819930072374e-05, "loss": 0.8194, "step": 195000 }, { "epoch": 0.27, "learning_rate": 4.5446549356387175e-05, "loss": 0.8217, "step": 195500 }, { "epoch": 0.27, "learning_rate": 4.5434899412050614e-05, "loss": 0.8224, "step": 196000 }, { "epoch": 0.27, "learning_rate": 4.542327276760272e-05, "loss": 0.8192, "step": 196500 }, { "epoch": 0.28, "learning_rate": 4.5411646123154826e-05, "loss": 0.8228, "step": 197000 }, { "epoch": 0.28, "learning_rate": 4.539999617881826e-05, "loss": 0.8213, "step": 197500 }, { "epoch": 0.28, "learning_rate": 4.538836953437037e-05, "loss": 0.8341, "step": 198000 }, { "epoch": 0.28, "learning_rate": 4.53767195900338e-05, "loss": 0.8326, "step": 198500 }, { "epoch": 0.28, "learning_rate": 4.536506964569723e-05, "loss": 0.8581, "step": 199000 }, { "epoch": 0.28, "learning_rate": 4.535344300124934e-05, "loss": 0.8591, "step": 199500 }, { "epoch": 0.28, "learning_rate": 4.5341793056912775e-05, "loss": 0.8319, "step": 200000 }, { "epoch": 0.28, "learning_rate": 4.5330143112576214e-05, "loss": 0.8294, "step": 200500 }, { "epoch": 0.28, "learning_rate": 4.5318493168239646e-05, "loss": 0.8245, "step": 201000 }, { "epoch": 0.28, "learning_rate": 4.530684322390308e-05, "loss": 0.8229, "step": 201500 }, { "epoch": 0.28, "learning_rate": 4.529519327956651e-05, "loss": 0.8261, "step": 202000 }, { "epoch": 0.28, "learning_rate": 4.5283543335229944e-05, "loss": 0.8417, "step": 202500 }, { "epoch": 0.28, "learning_rate": 4.527189339089338e-05, "loss": 0.8273, "step": 203000 }, { "epoch": 0.28, "learning_rate": 4.5260243446556815e-05, "loss": 0.825, "step": 203500 }, { "epoch": 0.29, "learning_rate": 4.5248616802108925e-05, "loss": 0.8281, "step": 204000 }, { "epoch": 0.29, "learning_rate": 4.523699015766103e-05, "loss": 0.8238, "step": 204500 }, { "epoch": 0.29, "learning_rate": 4.522534021332446e-05, "loss": 0.8215, "step": 205000 }, { "epoch": 0.29, "learning_rate": 4.52136902689879e-05, "loss": 0.8256, "step": 205500 }, { "epoch": 0.29, "learning_rate": 4.520204032465133e-05, "loss": 0.8219, "step": 206000 }, { "epoch": 0.29, "learning_rate": 4.519039038031477e-05, "loss": 0.8211, "step": 206500 }, { "epoch": 0.29, "learning_rate": 4.51787404359782e-05, "loss": 0.8247, "step": 207000 }, { "epoch": 0.29, "learning_rate": 4.5167090491641635e-05, "loss": 0.8256, "step": 207500 }, { "epoch": 0.29, "learning_rate": 4.515546384719374e-05, "loss": 0.8391, "step": 208000 }, { "epoch": 0.29, "learning_rate": 4.514381390285717e-05, "loss": 0.8362, "step": 208500 }, { "epoch": 0.29, "learning_rate": 4.513216395852061e-05, "loss": 0.8285, "step": 209000 }, { "epoch": 0.29, "learning_rate": 4.512051401418404e-05, "loss": 0.8232, "step": 209500 }, { "epoch": 0.29, "learning_rate": 4.510886406984748e-05, "loss": 0.8222, "step": 210000 }, { "epoch": 0.29, "learning_rate": 4.5097214125510914e-05, "loss": 0.8231, "step": 210500 }, { "epoch": 0.29, "learning_rate": 4.5085564181174346e-05, "loss": 0.8229, "step": 211000 }, { "epoch": 0.3, "learning_rate": 4.507391423683778e-05, "loss": 0.8194, "step": 211500 }, { "epoch": 0.3, "learning_rate": 4.506226429250121e-05, "loss": 0.8189, "step": 212000 }, { "epoch": 0.3, "learning_rate": 4.505061434816465e-05, "loss": 0.8164, "step": 212500 }, { "epoch": 0.3, "learning_rate": 4.503896440382808e-05, "loss": 0.822, "step": 213000 }, { "epoch": 0.3, "learning_rate": 4.5027314459491515e-05, "loss": 0.8151, "step": 213500 }, { "epoch": 0.3, "learning_rate": 4.501566451515495e-05, "loss": 0.8195, "step": 214000 }, { "epoch": 0.3, "learning_rate": 4.500403787070706e-05, "loss": 0.8306, "step": 214500 }, { "epoch": 0.3, "learning_rate": 4.499238792637049e-05, "loss": 0.8205, "step": 215000 }, { "epoch": 0.3, "learning_rate": 4.49807612819226e-05, "loss": 0.8271, "step": 215500 }, { "epoch": 0.3, "learning_rate": 4.496911133758603e-05, "loss": 0.8212, "step": 216000 }, { "epoch": 0.3, "learning_rate": 4.495746139324947e-05, "loss": 0.825, "step": 216500 }, { "epoch": 0.3, "learning_rate": 4.49458114489129e-05, "loss": 0.817, "step": 217000 }, { "epoch": 0.3, "learning_rate": 4.4934161504576335e-05, "loss": 0.8181, "step": 217500 }, { "epoch": 0.3, "learning_rate": 4.492251156023977e-05, "loss": 0.8214, "step": 218000 }, { "epoch": 0.31, "learning_rate": 4.49108616159032e-05, "loss": 0.8148, "step": 218500 }, { "epoch": 0.31, "learning_rate": 4.489921167156664e-05, "loss": 0.8189, "step": 219000 }, { "epoch": 0.31, "learning_rate": 4.488756172723007e-05, "loss": 0.8179, "step": 219500 }, { "epoch": 0.31, "learning_rate": 4.4875911782893504e-05, "loss": 0.8149, "step": 220000 }, { "epoch": 0.31, "learning_rate": 4.4864261838556936e-05, "loss": 0.82, "step": 220500 }, { "epoch": 0.31, "learning_rate": 4.4852635194109046e-05, "loss": 0.8152, "step": 221000 }, { "epoch": 0.31, "learning_rate": 4.484098524977248e-05, "loss": 0.8125, "step": 221500 }, { "epoch": 0.31, "learning_rate": 4.482933530543592e-05, "loss": 0.8136, "step": 222000 }, { "epoch": 0.31, "learning_rate": 4.481768536109935e-05, "loss": 0.8138, "step": 222500 }, { "epoch": 0.31, "learning_rate": 4.480603541676278e-05, "loss": 0.8107, "step": 223000 }, { "epoch": 0.31, "learning_rate": 4.4794385472426214e-05, "loss": 0.8158, "step": 223500 }, { "epoch": 0.31, "learning_rate": 4.4782735528089647e-05, "loss": 0.8135, "step": 224000 }, { "epoch": 0.31, "learning_rate": 4.4771108883641756e-05, "loss": 0.816, "step": 224500 }, { "epoch": 0.31, "learning_rate": 4.475945893930519e-05, "loss": 0.8199, "step": 225000 }, { "epoch": 0.32, "learning_rate": 4.474780899496863e-05, "loss": 0.8142, "step": 225500 }, { "epoch": 0.32, "learning_rate": 4.473615905063206e-05, "loss": 0.8129, "step": 226000 }, { "epoch": 0.32, "learning_rate": 4.472450910629549e-05, "loss": 0.8195, "step": 226500 }, { "epoch": 0.32, "learning_rate": 4.4712859161958925e-05, "loss": 0.8131, "step": 227000 }, { "epoch": 0.32, "learning_rate": 4.470120921762236e-05, "loss": 0.8128, "step": 227500 }, { "epoch": 0.32, "learning_rate": 4.4689559273285796e-05, "loss": 0.8113, "step": 228000 }, { "epoch": 0.32, "learning_rate": 4.4677932628837906e-05, "loss": 0.8127, "step": 228500 }, { "epoch": 0.32, "learning_rate": 4.466628268450134e-05, "loss": 0.814, "step": 229000 }, { "epoch": 0.32, "learning_rate": 4.465463274016477e-05, "loss": 0.8139, "step": 229500 }, { "epoch": 0.32, "learning_rate": 4.464300609571688e-05, "loss": 0.8151, "step": 230000 }, { "epoch": 0.32, "learning_rate": 4.463135615138031e-05, "loss": 0.8121, "step": 230500 }, { "epoch": 0.32, "learning_rate": 4.4619706207043745e-05, "loss": 0.8115, "step": 231000 }, { "epoch": 0.32, "learning_rate": 4.460805626270718e-05, "loss": 0.8128, "step": 231500 }, { "epoch": 0.32, "learning_rate": 4.4596406318370617e-05, "loss": 0.8125, "step": 232000 }, { "epoch": 0.33, "learning_rate": 4.458475637403405e-05, "loss": 0.8092, "step": 232500 }, { "epoch": 0.33, "learning_rate": 4.457310642969748e-05, "loss": 0.8085, "step": 233000 }, { "epoch": 0.33, "learning_rate": 4.4561456485360914e-05, "loss": 0.8093, "step": 233500 }, { "epoch": 0.33, "learning_rate": 4.4549806541024346e-05, "loss": 0.816, "step": 234000 }, { "epoch": 0.33, "learning_rate": 4.4538203196465126e-05, "loss": 0.8086, "step": 234500 }, { "epoch": 0.33, "learning_rate": 4.452655325212856e-05, "loss": 0.8163, "step": 235000 }, { "epoch": 0.33, "learning_rate": 4.4514903307792e-05, "loss": 0.8117, "step": 235500 }, { "epoch": 0.33, "learning_rate": 4.450325336345543e-05, "loss": 0.8066, "step": 236000 }, { "epoch": 0.33, "learning_rate": 4.449160341911886e-05, "loss": 0.815, "step": 236500 }, { "epoch": 0.33, "learning_rate": 4.44799534747823e-05, "loss": 0.8116, "step": 237000 }, { "epoch": 0.33, "learning_rate": 4.4468326830334404e-05, "loss": 0.8091, "step": 237500 }, { "epoch": 0.33, "learning_rate": 4.445667688599784e-05, "loss": 0.8184, "step": 238000 }, { "epoch": 0.33, "learning_rate": 4.4445026941661276e-05, "loss": 0.8122, "step": 238500 }, { "epoch": 0.33, "learning_rate": 4.4433400297213386e-05, "loss": 0.8081, "step": 239000 }, { "epoch": 0.33, "learning_rate": 4.442175035287682e-05, "loss": 0.8166, "step": 239500 }, { "epoch": 0.34, "learning_rate": 4.441012370842893e-05, "loss": 0.8095, "step": 240000 }, { "epoch": 0.34, "learning_rate": 4.439847376409236e-05, "loss": 0.8067, "step": 240500 }, { "epoch": 0.34, "learning_rate": 4.438682381975579e-05, "loss": 0.8133, "step": 241000 }, { "epoch": 0.34, "learning_rate": 4.4375173875419225e-05, "loss": 0.8031, "step": 241500 }, { "epoch": 0.34, "learning_rate": 4.436352393108266e-05, "loss": 0.8135, "step": 242000 }, { "epoch": 0.34, "learning_rate": 4.4351873986746096e-05, "loss": 0.8104, "step": 242500 }, { "epoch": 0.34, "learning_rate": 4.434022404240953e-05, "loss": 0.8069, "step": 243000 }, { "epoch": 0.34, "learning_rate": 4.432857409807296e-05, "loss": 0.8095, "step": 243500 }, { "epoch": 0.34, "learning_rate": 4.431692415373639e-05, "loss": 0.8065, "step": 244000 }, { "epoch": 0.34, "learning_rate": 4.4305274209399826e-05, "loss": 0.8072, "step": 244500 }, { "epoch": 0.34, "learning_rate": 4.4293647564951935e-05, "loss": 0.8085, "step": 245000 }, { "epoch": 0.34, "learning_rate": 4.4281997620615374e-05, "loss": 0.8125, "step": 245500 }, { "epoch": 0.34, "learning_rate": 4.427034767627881e-05, "loss": 0.8063, "step": 246000 }, { "epoch": 0.34, "learning_rate": 4.425869773194224e-05, "loss": 0.8096, "step": 246500 }, { "epoch": 0.35, "learning_rate": 4.424704778760567e-05, "loss": 0.8092, "step": 247000 }, { "epoch": 0.35, "learning_rate": 4.4235397843269104e-05, "loss": 0.8079, "step": 247500 }, { "epoch": 0.35, "learning_rate": 4.4223771198821214e-05, "loss": 0.8065, "step": 248000 }, { "epoch": 0.35, "learning_rate": 4.421212125448465e-05, "loss": 0.8095, "step": 248500 }, { "epoch": 0.35, "learning_rate": 4.4200471310148085e-05, "loss": 0.8067, "step": 249000 }, { "epoch": 0.35, "learning_rate": 4.418882136581152e-05, "loss": 0.8038, "step": 249500 }, { "epoch": 0.35, "learning_rate": 4.417719472136363e-05, "loss": 0.8113, "step": 250000 }, { "epoch": 0.35, "learning_rate": 4.416554477702706e-05, "loss": 0.8055, "step": 250500 }, { "epoch": 0.35, "learning_rate": 4.415389483269049e-05, "loss": 0.803, "step": 251000 }, { "epoch": 0.35, "learning_rate": 4.4142244888353924e-05, "loss": 0.8099, "step": 251500 }, { "epoch": 0.35, "learning_rate": 4.4130594944017363e-05, "loss": 0.8102, "step": 252000 }, { "epoch": 0.35, "learning_rate": 4.4118968299569466e-05, "loss": 0.8033, "step": 252500 }, { "epoch": 0.35, "learning_rate": 4.4107341655121576e-05, "loss": 0.8094, "step": 253000 }, { "epoch": 0.35, "learning_rate": 4.409569171078501e-05, "loss": 0.8063, "step": 253500 }, { "epoch": 0.36, "learning_rate": 4.408404176644844e-05, "loss": 0.8095, "step": 254000 }, { "epoch": 0.36, "learning_rate": 4.407239182211187e-05, "loss": 0.8048, "step": 254500 }, { "epoch": 0.36, "learning_rate": 4.4060741877775305e-05, "loss": 0.8051, "step": 255000 }, { "epoch": 0.36, "learning_rate": 4.4049091933438745e-05, "loss": 0.8058, "step": 255500 }, { "epoch": 0.36, "learning_rate": 4.403744198910218e-05, "loss": 0.8087, "step": 256000 }, { "epoch": 0.36, "learning_rate": 4.402579204476561e-05, "loss": 0.8039, "step": 256500 }, { "epoch": 0.36, "learning_rate": 4.401414210042905e-05, "loss": 0.7978, "step": 257000 }, { "epoch": 0.36, "learning_rate": 4.400249215609248e-05, "loss": 0.8056, "step": 257500 }, { "epoch": 0.36, "learning_rate": 4.399084221175591e-05, "loss": 0.7997, "step": 258000 }, { "epoch": 0.36, "learning_rate": 4.397919226741935e-05, "loss": 0.803, "step": 258500 }, { "epoch": 0.36, "learning_rate": 4.3967542323082785e-05, "loss": 0.802, "step": 259000 }, { "epoch": 0.36, "learning_rate": 4.395589237874622e-05, "loss": 0.8048, "step": 259500 }, { "epoch": 0.36, "learning_rate": 4.394424243440965e-05, "loss": 0.801, "step": 260000 }, { "epoch": 0.36, "learning_rate": 4.393259249007308e-05, "loss": 0.8046, "step": 260500 }, { "epoch": 0.36, "learning_rate": 4.392094254573652e-05, "loss": 0.8051, "step": 261000 }, { "epoch": 0.37, "learning_rate": 4.390931590128863e-05, "loss": 0.8089, "step": 261500 }, { "epoch": 0.37, "learning_rate": 4.389766595695206e-05, "loss": 0.8077, "step": 262000 }, { "epoch": 0.37, "learning_rate": 4.3886016012615495e-05, "loss": 0.803, "step": 262500 }, { "epoch": 0.37, "learning_rate": 4.387436606827893e-05, "loss": 0.8043, "step": 263000 }, { "epoch": 0.37, "learning_rate": 4.386271612394236e-05, "loss": 0.8027, "step": 263500 }, { "epoch": 0.37, "learning_rate": 4.38510661796058e-05, "loss": 0.8022, "step": 264000 }, { "epoch": 0.37, "learning_rate": 4.383941623526923e-05, "loss": 0.8056, "step": 264500 }, { "epoch": 0.37, "learning_rate": 4.382778959082134e-05, "loss": 0.8055, "step": 265000 }, { "epoch": 0.37, "learning_rate": 4.3816139646484774e-05, "loss": 0.802, "step": 265500 }, { "epoch": 0.37, "learning_rate": 4.3804489702148206e-05, "loss": 0.7967, "step": 266000 }, { "epoch": 0.37, "learning_rate": 4.379283975781164e-05, "loss": 0.8052, "step": 266500 }, { "epoch": 0.37, "learning_rate": 4.378118981347507e-05, "loss": 0.7993, "step": 267000 }, { "epoch": 0.37, "learning_rate": 4.376953986913851e-05, "loss": 0.7974, "step": 267500 }, { "epoch": 0.37, "learning_rate": 4.375788992480194e-05, "loss": 0.8015, "step": 268000 }, { "epoch": 0.38, "learning_rate": 4.374626328035405e-05, "loss": 0.8023, "step": 268500 }, { "epoch": 0.38, "learning_rate": 4.3734613336017484e-05, "loss": 0.7947, "step": 269000 }, { "epoch": 0.38, "learning_rate": 4.3722963391680917e-05, "loss": 0.8031, "step": 269500 }, { "epoch": 0.38, "learning_rate": 4.371131344734435e-05, "loss": 0.801, "step": 270000 }, { "epoch": 0.38, "learning_rate": 4.369966350300779e-05, "loss": 0.7999, "step": 270500 }, { "epoch": 0.38, "learning_rate": 4.368803685855989e-05, "loss": 0.8036, "step": 271000 }, { "epoch": 0.38, "learning_rate": 4.367638691422333e-05, "loss": 0.8005, "step": 271500 }, { "epoch": 0.38, "learning_rate": 4.366473696988676e-05, "loss": 0.7988, "step": 272000 }, { "epoch": 0.38, "learning_rate": 4.3653087025550195e-05, "loss": 0.7989, "step": 272500 }, { "epoch": 0.38, "learning_rate": 4.364143708121363e-05, "loss": 0.7995, "step": 273000 }, { "epoch": 0.38, "learning_rate": 4.362978713687706e-05, "loss": 0.8034, "step": 273500 }, { "epoch": 0.38, "learning_rate": 4.36181371925405e-05, "loss": 0.8013, "step": 274000 }, { "epoch": 0.38, "learning_rate": 4.360648724820393e-05, "loss": 0.7954, "step": 274500 }, { "epoch": 0.38, "learning_rate": 4.359486060375604e-05, "loss": 0.8002, "step": 275000 }, { "epoch": 0.39, "learning_rate": 4.358321065941947e-05, "loss": 0.8041, "step": 275500 }, { "epoch": 0.39, "learning_rate": 4.3571560715082905e-05, "loss": 0.7984, "step": 276000 }, { "epoch": 0.39, "learning_rate": 4.355991077074634e-05, "loss": 0.7967, "step": 276500 }, { "epoch": 0.39, "learning_rate": 4.354826082640978e-05, "loss": 0.7998, "step": 277000 }, { "epoch": 0.39, "learning_rate": 4.353661088207321e-05, "loss": 0.7988, "step": 277500 }, { "epoch": 0.39, "learning_rate": 4.352496093773664e-05, "loss": 0.7952, "step": 278000 }, { "epoch": 0.39, "learning_rate": 4.3513310993400074e-05, "loss": 0.7984, "step": 278500 }, { "epoch": 0.39, "learning_rate": 4.350173094872953e-05, "loss": 0.7991, "step": 279000 }, { "epoch": 0.39, "learning_rate": 4.3490081004392964e-05, "loss": 0.7961, "step": 279500 }, { "epoch": 0.39, "learning_rate": 4.3478431060056396e-05, "loss": 0.8015, "step": 280000 }, { "epoch": 0.39, "learning_rate": 4.346678111571983e-05, "loss": 0.7988, "step": 280500 }, { "epoch": 0.39, "learning_rate": 4.345513117138327e-05, "loss": 0.7987, "step": 281000 }, { "epoch": 0.39, "learning_rate": 4.34434812270467e-05, "loss": 0.7979, "step": 281500 }, { "epoch": 0.39, "learning_rate": 4.343183128271013e-05, "loss": 0.7958, "step": 282000 }, { "epoch": 0.39, "learning_rate": 4.3420181338373565e-05, "loss": 0.7923, "step": 282500 }, { "epoch": 0.4, "learning_rate": 4.3408531394037e-05, "loss": 0.8003, "step": 283000 }, { "epoch": 0.4, "learning_rate": 4.339688144970043e-05, "loss": 0.7944, "step": 283500 }, { "epoch": 0.4, "learning_rate": 4.338523150536387e-05, "loss": 0.7981, "step": 284000 }, { "epoch": 0.4, "learning_rate": 4.33735815610273e-05, "loss": 0.7953, "step": 284500 }, { "epoch": 0.4, "learning_rate": 4.336195491657941e-05, "loss": 0.7931, "step": 285000 }, { "epoch": 0.4, "learning_rate": 4.335030497224284e-05, "loss": 0.7919, "step": 285500 }, { "epoch": 0.4, "learning_rate": 4.3338655027906276e-05, "loss": 0.796, "step": 286000 }, { "epoch": 0.4, "learning_rate": 4.332700508356971e-05, "loss": 0.7923, "step": 286500 }, { "epoch": 0.4, "learning_rate": 4.331537843912182e-05, "loss": 0.7909, "step": 287000 }, { "epoch": 0.4, "learning_rate": 4.330372849478526e-05, "loss": 0.8002, "step": 287500 }, { "epoch": 0.4, "learning_rate": 4.329207855044869e-05, "loss": 0.7961, "step": 288000 }, { "epoch": 0.4, "learning_rate": 4.328042860611212e-05, "loss": 0.7997, "step": 288500 }, { "epoch": 0.4, "learning_rate": 4.3268778661775554e-05, "loss": 0.7937, "step": 289000 }, { "epoch": 0.4, "learning_rate": 4.3257175317216334e-05, "loss": 0.7928, "step": 289500 }, { "epoch": 0.41, "learning_rate": 4.3245525372879766e-05, "loss": 0.7977, "step": 290000 }, { "epoch": 0.41, "learning_rate": 4.3233875428543205e-05, "loss": 0.7929, "step": 290500 }, { "epoch": 0.41, "learning_rate": 4.322222548420664e-05, "loss": 0.7884, "step": 291000 }, { "epoch": 0.41, "learning_rate": 4.321057553987008e-05, "loss": 0.7985, "step": 291500 }, { "epoch": 0.41, "learning_rate": 4.319894889542218e-05, "loss": 0.7942, "step": 292000 }, { "epoch": 0.41, "learning_rate": 4.318732225097429e-05, "loss": 0.7968, "step": 292500 }, { "epoch": 0.41, "learning_rate": 4.317567230663772e-05, "loss": 0.797, "step": 293000 }, { "epoch": 0.41, "learning_rate": 4.3164022362301154e-05, "loss": 0.7997, "step": 293500 }, { "epoch": 0.41, "learning_rate": 4.3152372417964586e-05, "loss": 0.809, "step": 294000 }, { "epoch": 0.41, "learning_rate": 4.3140745773516696e-05, "loss": 0.7955, "step": 294500 }, { "epoch": 0.41, "learning_rate": 4.3129095829180135e-05, "loss": 0.7933, "step": 295000 }, { "epoch": 0.41, "learning_rate": 4.311744588484357e-05, "loss": 0.7966, "step": 295500 }, { "epoch": 0.41, "learning_rate": 4.3105795940507e-05, "loss": 0.7945, "step": 296000 }, { "epoch": 0.41, "learning_rate": 4.309414599617043e-05, "loss": 0.7947, "step": 296500 }, { "epoch": 0.42, "learning_rate": 4.3082496051833865e-05, "loss": 0.797, "step": 297000 }, { "epoch": 0.42, "learning_rate": 4.3070869407385974e-05, "loss": 0.7984, "step": 297500 }, { "epoch": 0.42, "learning_rate": 4.305921946304941e-05, "loss": 0.821, "step": 298000 }, { "epoch": 0.42, "learning_rate": 4.3047569518712846e-05, "loss": 0.8131, "step": 298500 }, { "epoch": 0.42, "learning_rate": 4.303591957437628e-05, "loss": 0.806, "step": 299000 }, { "epoch": 0.42, "learning_rate": 4.302426963003971e-05, "loss": 0.8243, "step": 299500 }, { "epoch": 0.42, "learning_rate": 4.301261968570314e-05, "loss": 0.8011, "step": 300000 }, { "epoch": 0.42, "learning_rate": 4.300099304125525e-05, "loss": 0.7979, "step": 300500 }, { "epoch": 0.42, "learning_rate": 4.2989343096918685e-05, "loss": 0.8148, "step": 301000 }, { "epoch": 0.42, "learning_rate": 4.2977693152582124e-05, "loss": 0.8092, "step": 301500 }, { "epoch": 0.42, "learning_rate": 4.296606650813423e-05, "loss": 0.8071, "step": 302000 }, { "epoch": 0.42, "learning_rate": 4.295441656379766e-05, "loss": 0.8033, "step": 302500 }, { "epoch": 0.42, "learning_rate": 4.294276661946109e-05, "loss": 0.8039, "step": 303000 }, { "epoch": 0.42, "learning_rate": 4.293111667512453e-05, "loss": 0.7987, "step": 303500 }, { "epoch": 0.42, "learning_rate": 4.291946673078796e-05, "loss": 0.7926, "step": 304000 }, { "epoch": 0.43, "learning_rate": 4.29078167864514e-05, "loss": 0.7963, "step": 304500 }, { "epoch": 0.43, "learning_rate": 4.2896166842114835e-05, "loss": 0.8042, "step": 305000 }, { "epoch": 0.43, "learning_rate": 4.288451689777827e-05, "loss": 0.8003, "step": 305500 }, { "epoch": 0.43, "learning_rate": 4.28728669534417e-05, "loss": 0.7982, "step": 306000 }, { "epoch": 0.43, "learning_rate": 4.286121700910513e-05, "loss": 0.7946, "step": 306500 }, { "epoch": 0.43, "learning_rate": 4.284959036465724e-05, "loss": 0.7949, "step": 307000 }, { "epoch": 0.43, "learning_rate": 4.2837940420320674e-05, "loss": 0.7978, "step": 307500 }, { "epoch": 0.43, "learning_rate": 4.282631377587278e-05, "loss": 0.7875, "step": 308000 }, { "epoch": 0.43, "learning_rate": 4.2814663831536216e-05, "loss": 0.7979, "step": 308500 }, { "epoch": 0.43, "learning_rate": 4.280301388719965e-05, "loss": 0.8013, "step": 309000 }, { "epoch": 0.43, "learning_rate": 4.279136394286308e-05, "loss": 0.8133, "step": 309500 }, { "epoch": 0.43, "learning_rate": 4.277971399852651e-05, "loss": 0.8316, "step": 310000 }, { "epoch": 0.43, "learning_rate": 4.276808735407862e-05, "loss": 0.8268, "step": 310500 }, { "epoch": 0.43, "learning_rate": 4.2756437409742055e-05, "loss": 0.8176, "step": 311000 }, { "epoch": 0.44, "learning_rate": 4.2744787465405494e-05, "loss": 0.8166, "step": 311500 }, { "epoch": 0.44, "learning_rate": 4.2733137521068927e-05, "loss": 0.8111, "step": 312000 }, { "epoch": 0.44, "learning_rate": 4.272148757673236e-05, "loss": 0.8183, "step": 312500 }, { "epoch": 0.44, "learning_rate": 4.270983763239579e-05, "loss": 0.8156, "step": 313000 }, { "epoch": 0.44, "learning_rate": 4.2698187688059224e-05, "loss": 0.8149, "step": 313500 }, { "epoch": 0.44, "learning_rate": 4.268653774372266e-05, "loss": 0.8104, "step": 314000 }, { "epoch": 0.44, "learning_rate": 4.2674887799386095e-05, "loss": 0.8024, "step": 314500 }, { "epoch": 0.44, "learning_rate": 4.2663237855049534e-05, "loss": 0.8, "step": 315000 }, { "epoch": 0.44, "learning_rate": 4.265158791071297e-05, "loss": 0.8098, "step": 315500 }, { "epoch": 0.44, "learning_rate": 4.263996126626507e-05, "loss": 0.7983, "step": 316000 }, { "epoch": 0.44, "learning_rate": 4.26283113219285e-05, "loss": 0.8053, "step": 316500 }, { "epoch": 0.44, "learning_rate": 4.2616661377591934e-05, "loss": 0.8107, "step": 317000 }, { "epoch": 0.44, "learning_rate": 4.2605034733144044e-05, "loss": 0.8356, "step": 317500 }, { "epoch": 0.44, "learning_rate": 4.259338478880748e-05, "loss": 0.8317, "step": 318000 }, { "epoch": 0.45, "learning_rate": 4.2581734844470915e-05, "loss": 0.8162, "step": 318500 }, { "epoch": 0.45, "learning_rate": 4.257008490013435e-05, "loss": 0.8457, "step": 319000 }, { "epoch": 0.45, "learning_rate": 4.255845825568646e-05, "loss": 0.8166, "step": 319500 }, { "epoch": 0.45, "learning_rate": 4.254680831134989e-05, "loss": 0.8169, "step": 320000 }, { "epoch": 0.45, "learning_rate": 4.253515836701332e-05, "loss": 0.8308, "step": 320500 }, { "epoch": 0.45, "learning_rate": 4.252353172256543e-05, "loss": 0.8338, "step": 321000 }, { "epoch": 0.45, "learning_rate": 4.251188177822887e-05, "loss": 0.8385, "step": 321500 }, { "epoch": 0.45, "learning_rate": 4.25002318338923e-05, "loss": 0.8332, "step": 322000 }, { "epoch": 0.45, "learning_rate": 4.2488581889555736e-05, "loss": 0.8459, "step": 322500 }, { "epoch": 0.45, "learning_rate": 4.247693194521917e-05, "loss": 0.7985, "step": 323000 }, { "epoch": 0.45, "learning_rate": 4.24652820008826e-05, "loss": 0.7977, "step": 323500 }, { "epoch": 0.45, "learning_rate": 4.245363205654603e-05, "loss": 0.8423, "step": 324000 }, { "epoch": 0.45, "learning_rate": 4.244198211220947e-05, "loss": 0.8291, "step": 324500 }, { "epoch": 0.45, "learning_rate": 4.2430332167872904e-05, "loss": 0.8161, "step": 325000 }, { "epoch": 0.46, "learning_rate": 4.2418705523425014e-05, "loss": 0.8181, "step": 325500 }, { "epoch": 0.46, "learning_rate": 4.2407055579088446e-05, "loss": 0.8091, "step": 326000 }, { "epoch": 0.46, "learning_rate": 4.239540563475188e-05, "loss": 0.839, "step": 326500 }, { "epoch": 0.46, "learning_rate": 4.238375569041531e-05, "loss": 0.8544, "step": 327000 }, { "epoch": 0.46, "learning_rate": 4.237210574607875e-05, "loss": 0.8582, "step": 327500 }, { "epoch": 0.46, "learning_rate": 4.236047910163086e-05, "loss": 0.8648, "step": 328000 }, { "epoch": 0.46, "learning_rate": 4.234882915729429e-05, "loss": 0.8724, "step": 328500 }, { "epoch": 0.46, "learning_rate": 4.2337179212957725e-05, "loss": 0.8575, "step": 329000 }, { "epoch": 0.46, "learning_rate": 4.232552926862116e-05, "loss": 0.8607, "step": 329500 }, { "epoch": 0.46, "learning_rate": 4.231387932428459e-05, "loss": 0.8543, "step": 330000 }, { "epoch": 0.46, "learning_rate": 4.230222937994803e-05, "loss": 0.8287, "step": 330500 }, { "epoch": 0.46, "learning_rate": 4.229057943561146e-05, "loss": 0.8257, "step": 331000 }, { "epoch": 0.46, "learning_rate": 4.227895279116357e-05, "loss": 0.8382, "step": 331500 }, { "epoch": 0.46, "learning_rate": 4.2267302846827e-05, "loss": 0.8311, "step": 332000 }, { "epoch": 0.46, "learning_rate": 4.2255652902490435e-05, "loss": 0.8336, "step": 332500 }, { "epoch": 0.47, "learning_rate": 4.224400295815387e-05, "loss": 0.826, "step": 333000 }, { "epoch": 0.47, "learning_rate": 4.22323530138173e-05, "loss": 0.8268, "step": 333500 }, { "epoch": 0.47, "learning_rate": 4.222072636936941e-05, "loss": 0.829, "step": 334000 }, { "epoch": 0.47, "learning_rate": 4.220907642503284e-05, "loss": 0.829, "step": 334500 }, { "epoch": 0.47, "learning_rate": 4.219742648069628e-05, "loss": 0.8276, "step": 335000 }, { "epoch": 0.47, "learning_rate": 4.2185776536359714e-05, "loss": 0.8348, "step": 335500 }, { "epoch": 0.47, "learning_rate": 4.2174126592023146e-05, "loss": 0.8374, "step": 336000 }, { "epoch": 0.47, "learning_rate": 4.216247664768658e-05, "loss": 0.8373, "step": 336500 }, { "epoch": 0.47, "learning_rate": 4.215082670335002e-05, "loss": 0.8385, "step": 337000 }, { "epoch": 0.47, "learning_rate": 4.213917675901345e-05, "loss": 0.8631, "step": 337500 }, { "epoch": 0.47, "learning_rate": 4.212755011456555e-05, "loss": 0.8468, "step": 338000 }, { "epoch": 0.47, "learning_rate": 4.211590017022899e-05, "loss": 0.8287, "step": 338500 }, { "epoch": 0.47, "learning_rate": 4.2104250225892424e-05, "loss": 0.8369, "step": 339000 }, { "epoch": 0.47, "learning_rate": 4.2092600281555857e-05, "loss": 0.8269, "step": 339500 }, { "epoch": 0.48, "learning_rate": 4.208095033721929e-05, "loss": 0.8374, "step": 340000 }, { "epoch": 0.48, "learning_rate": 4.206930039288273e-05, "loss": 0.8268, "step": 340500 }, { "epoch": 0.48, "learning_rate": 4.205767374843483e-05, "loss": 0.8253, "step": 341000 }, { "epoch": 0.48, "learning_rate": 4.204602380409827e-05, "loss": 0.823, "step": 341500 }, { "epoch": 0.48, "learning_rate": 4.203439715965037e-05, "loss": 0.8634, "step": 342000 }, { "epoch": 0.48, "learning_rate": 4.2022747215313805e-05, "loss": 0.8651, "step": 342500 }, { "epoch": 0.48, "learning_rate": 4.2011120570865915e-05, "loss": 0.8343, "step": 343000 }, { "epoch": 0.48, "learning_rate": 4.199949392641802e-05, "loss": 0.8262, "step": 343500 }, { "epoch": 0.48, "learning_rate": 4.198784398208146e-05, "loss": 0.8061, "step": 344000 }, { "epoch": 0.48, "learning_rate": 4.197619403774489e-05, "loss": 0.8069, "step": 344500 }, { "epoch": 0.48, "learning_rate": 4.196454409340833e-05, "loss": 0.8068, "step": 345000 }, { "epoch": 0.48, "learning_rate": 4.195289414907176e-05, "loss": 0.8182, "step": 345500 }, { "epoch": 0.48, "learning_rate": 4.194124420473519e-05, "loss": 0.8081, "step": 346000 }, { "epoch": 0.48, "learning_rate": 4.1929594260398626e-05, "loss": 0.8062, "step": 346500 }, { "epoch": 0.49, "learning_rate": 4.191794431606206e-05, "loss": 0.8123, "step": 347000 }, { "epoch": 0.49, "learning_rate": 4.19062943717255e-05, "loss": 0.8168, "step": 347500 }, { "epoch": 0.49, "learning_rate": 4.189464442738893e-05, "loss": 0.8224, "step": 348000 }, { "epoch": 0.49, "learning_rate": 4.188299448305236e-05, "loss": 0.8218, "step": 348500 }, { "epoch": 0.49, "learning_rate": 4.1871344538715794e-05, "loss": 0.8066, "step": 349000 }, { "epoch": 0.49, "learning_rate": 4.1859694594379227e-05, "loss": 0.8019, "step": 349500 }, { "epoch": 0.49, "learning_rate": 4.184804465004266e-05, "loss": 0.8052, "step": 350000 }, { "epoch": 0.49, "learning_rate": 4.18363947057061e-05, "loss": 0.7851, "step": 350500 }, { "epoch": 0.49, "learning_rate": 4.182474476136953e-05, "loss": 0.7908, "step": 351000 }, { "epoch": 0.49, "learning_rate": 4.181309481703296e-05, "loss": 0.7895, "step": 351500 }, { "epoch": 0.49, "learning_rate": 4.18014448726964e-05, "loss": 0.786, "step": 352000 }, { "epoch": 0.49, "learning_rate": 4.1789818228248505e-05, "loss": 0.7863, "step": 352500 }, { "epoch": 0.49, "learning_rate": 4.177816828391194e-05, "loss": 0.7956, "step": 353000 }, { "epoch": 0.49, "learning_rate": 4.176654163946405e-05, "loss": 0.7947, "step": 353500 }, { "epoch": 0.49, "learning_rate": 4.1754891695127486e-05, "loss": 0.7881, "step": 354000 }, { "epoch": 0.5, "learning_rate": 4.174324175079092e-05, "loss": 0.7873, "step": 354500 }, { "epoch": 0.5, "learning_rate": 4.173159180645435e-05, "loss": 0.7989, "step": 355000 }, { "epoch": 0.5, "learning_rate": 4.171994186211778e-05, "loss": 0.7978, "step": 355500 }, { "epoch": 0.5, "learning_rate": 4.1708291917781215e-05, "loss": 0.7814, "step": 356000 }, { "epoch": 0.5, "learning_rate": 4.1696641973444655e-05, "loss": 0.7898, "step": 356500 }, { "epoch": 0.5, "learning_rate": 4.1685015328996764e-05, "loss": 0.7904, "step": 357000 }, { "epoch": 0.5, "learning_rate": 4.1673365384660197e-05, "loss": 0.7859, "step": 357500 }, { "epoch": 0.5, "learning_rate": 4.166171544032363e-05, "loss": 0.79, "step": 358000 }, { "epoch": 0.5, "learning_rate": 4.165006549598706e-05, "loss": 0.7886, "step": 358500 }, { "epoch": 0.5, "learning_rate": 4.1638415551650494e-05, "loss": 0.7877, "step": 359000 }, { "epoch": 0.5, "learning_rate": 4.1626765607313926e-05, "loss": 0.7856, "step": 359500 }, { "epoch": 0.5, "learning_rate": 4.1615115662977365e-05, "loss": 0.7807, "step": 360000 }, { "epoch": 0.5, "learning_rate": 4.16034657186408e-05, "loss": 0.7847, "step": 360500 }, { "epoch": 0.5, "learning_rate": 4.159181577430423e-05, "loss": 0.787, "step": 361000 }, { "epoch": 0.51, "learning_rate": 4.158016582996766e-05, "loss": 0.7851, "step": 361500 }, { "epoch": 0.51, "learning_rate": 4.15685158856311e-05, "loss": 0.787, "step": 362000 }, { "epoch": 0.51, "learning_rate": 4.1556865941294534e-05, "loss": 0.7851, "step": 362500 }, { "epoch": 0.51, "learning_rate": 4.154521599695797e-05, "loss": 0.7885, "step": 363000 }, { "epoch": 0.51, "learning_rate": 4.1533566052621405e-05, "loss": 0.7823, "step": 363500 }, { "epoch": 0.51, "learning_rate": 4.152191610828484e-05, "loss": 0.7876, "step": 364000 }, { "epoch": 0.51, "learning_rate": 4.151026616394827e-05, "loss": 0.7874, "step": 364500 }, { "epoch": 0.51, "learning_rate": 4.14986162196117e-05, "loss": 0.7853, "step": 365000 }, { "epoch": 0.51, "learning_rate": 4.148698957516381e-05, "loss": 0.7946, "step": 365500 }, { "epoch": 0.51, "learning_rate": 4.147533963082725e-05, "loss": 0.7879, "step": 366000 }, { "epoch": 0.51, "learning_rate": 4.1463689686490684e-05, "loss": 0.788, "step": 366500 }, { "epoch": 0.51, "learning_rate": 4.1452063042042787e-05, "loss": 0.7834, "step": 367000 }, { "epoch": 0.51, "learning_rate": 4.144041309770622e-05, "loss": 0.7832, "step": 367500 }, { "epoch": 0.51, "learning_rate": 4.142876315336965e-05, "loss": 0.7851, "step": 368000 }, { "epoch": 0.52, "learning_rate": 4.141711320903309e-05, "loss": 0.7814, "step": 368500 }, { "epoch": 0.52, "learning_rate": 4.140546326469652e-05, "loss": 0.7828, "step": 369000 }, { "epoch": 0.52, "learning_rate": 4.139381332035996e-05, "loss": 0.7884, "step": 369500 }, { "epoch": 0.52, "learning_rate": 4.1382186675912065e-05, "loss": 0.787, "step": 370000 }, { "epoch": 0.52, "learning_rate": 4.13705367315755e-05, "loss": 0.7932, "step": 370500 }, { "epoch": 0.52, "learning_rate": 4.135888678723893e-05, "loss": 0.788, "step": 371000 }, { "epoch": 0.52, "learning_rate": 4.134723684290236e-05, "loss": 0.7815, "step": 371500 }, { "epoch": 0.52, "learning_rate": 4.13355868985658e-05, "loss": 0.7907, "step": 372000 }, { "epoch": 0.52, "learning_rate": 4.132396025411791e-05, "loss": 0.7947, "step": 372500 }, { "epoch": 0.52, "learning_rate": 4.131231030978134e-05, "loss": 0.7902, "step": 373000 }, { "epoch": 0.52, "learning_rate": 4.1300660365444775e-05, "loss": 0.7888, "step": 373500 }, { "epoch": 0.52, "learning_rate": 4.128901042110821e-05, "loss": 0.786, "step": 374000 }, { "epoch": 0.52, "learning_rate": 4.127738377666032e-05, "loss": 0.7844, "step": 374500 }, { "epoch": 0.52, "learning_rate": 4.126573383232375e-05, "loss": 0.7838, "step": 375000 }, { "epoch": 0.52, "learning_rate": 4.125408388798718e-05, "loss": 0.7809, "step": 375500 }, { "epoch": 0.53, "learning_rate": 4.124243394365062e-05, "loss": 0.7837, "step": 376000 }, { "epoch": 0.53, "learning_rate": 4.123080729920273e-05, "loss": 0.7841, "step": 376500 }, { "epoch": 0.53, "learning_rate": 4.121915735486616e-05, "loss": 0.7862, "step": 377000 }, { "epoch": 0.53, "learning_rate": 4.1207507410529596e-05, "loss": 0.7818, "step": 377500 }, { "epoch": 0.53, "learning_rate": 4.119585746619303e-05, "loss": 0.7807, "step": 378000 }, { "epoch": 0.53, "learning_rate": 4.118420752185646e-05, "loss": 0.7823, "step": 378500 }, { "epoch": 0.53, "learning_rate": 4.11725575775199e-05, "loss": 0.7818, "step": 379000 }, { "epoch": 0.53, "learning_rate": 4.116090763318333e-05, "loss": 0.7872, "step": 379500 }, { "epoch": 0.53, "learning_rate": 4.114928098873544e-05, "loss": 0.7896, "step": 380000 }, { "epoch": 0.53, "learning_rate": 4.1137631044398874e-05, "loss": 0.7799, "step": 380500 }, { "epoch": 0.53, "learning_rate": 4.1125981100062306e-05, "loss": 0.7825, "step": 381000 }, { "epoch": 0.53, "learning_rate": 4.111433115572574e-05, "loss": 0.803, "step": 381500 }, { "epoch": 0.53, "learning_rate": 4.110268121138917e-05, "loss": 0.8095, "step": 382000 }, { "epoch": 0.53, "learning_rate": 4.109103126705261e-05, "loss": 0.8068, "step": 382500 }, { "epoch": 0.54, "learning_rate": 4.107938132271604e-05, "loss": 0.7902, "step": 383000 }, { "epoch": 0.54, "learning_rate": 4.1067731378379475e-05, "loss": 0.7808, "step": 383500 }, { "epoch": 0.54, "learning_rate": 4.105608143404291e-05, "loss": 0.7997, "step": 384000 }, { "epoch": 0.54, "learning_rate": 4.104445478959502e-05, "loss": 0.8161, "step": 384500 }, { "epoch": 0.54, "learning_rate": 4.103280484525845e-05, "loss": 0.7839, "step": 385000 }, { "epoch": 0.54, "learning_rate": 4.102115490092189e-05, "loss": 0.7872, "step": 385500 }, { "epoch": 0.54, "learning_rate": 4.100950495658532e-05, "loss": 0.7847, "step": 386000 }, { "epoch": 0.54, "learning_rate": 4.099785501224875e-05, "loss": 0.78, "step": 386500 }, { "epoch": 0.54, "learning_rate": 4.098622836780086e-05, "loss": 0.7824, "step": 387000 }, { "epoch": 0.54, "learning_rate": 4.0974578423464295e-05, "loss": 0.7872, "step": 387500 }, { "epoch": 0.54, "learning_rate": 4.09629517790164e-05, "loss": 0.7835, "step": 388000 }, { "epoch": 0.54, "learning_rate": 4.095130183467984e-05, "loss": 0.7871, "step": 388500 }, { "epoch": 0.54, "learning_rate": 4.093965189034327e-05, "loss": 0.7863, "step": 389000 }, { "epoch": 0.54, "learning_rate": 4.092800194600671e-05, "loss": 0.7843, "step": 389500 }, { "epoch": 0.55, "learning_rate": 4.091635200167014e-05, "loss": 0.7778, "step": 390000 }, { "epoch": 0.55, "learning_rate": 4.0904702057333573e-05, "loss": 0.7853, "step": 390500 }, { "epoch": 0.55, "learning_rate": 4.0893052112997006e-05, "loss": 0.7824, "step": 391000 }, { "epoch": 0.55, "learning_rate": 4.088140216866044e-05, "loss": 0.7862, "step": 391500 }, { "epoch": 0.55, "learning_rate": 4.086975222432388e-05, "loss": 0.7814, "step": 392000 }, { "epoch": 0.55, "learning_rate": 4.085810227998731e-05, "loss": 0.7866, "step": 392500 }, { "epoch": 0.55, "learning_rate": 4.084645233565074e-05, "loss": 0.7844, "step": 393000 }, { "epoch": 0.55, "learning_rate": 4.0834802391314174e-05, "loss": 0.7828, "step": 393500 }, { "epoch": 0.55, "learning_rate": 4.082315244697761e-05, "loss": 0.7886, "step": 394000 }, { "epoch": 0.55, "learning_rate": 4.0811525802529716e-05, "loss": 0.7858, "step": 394500 }, { "epoch": 0.55, "learning_rate": 4.0799875858193156e-05, "loss": 0.7856, "step": 395000 }, { "epoch": 0.55, "learning_rate": 4.078822591385659e-05, "loss": 0.786, "step": 395500 }, { "epoch": 0.55, "learning_rate": 4.077657596952002e-05, "loss": 0.7984, "step": 396000 }, { "epoch": 0.55, "learning_rate": 4.076492602518345e-05, "loss": 0.7928, "step": 396500 }, { "epoch": 0.56, "learning_rate": 4.0753276080846885e-05, "loss": 0.806, "step": 397000 }, { "epoch": 0.56, "learning_rate": 4.0741649436398995e-05, "loss": 0.7992, "step": 397500 }, { "epoch": 0.56, "learning_rate": 4.072999949206243e-05, "loss": 0.8015, "step": 398000 }, { "epoch": 0.56, "learning_rate": 4.0718349547725866e-05, "loss": 0.8038, "step": 398500 }, { "epoch": 0.56, "learning_rate": 4.07066996033893e-05, "loss": 0.7918, "step": 399000 }, { "epoch": 0.56, "learning_rate": 4.069504965905273e-05, "loss": 0.7883, "step": 399500 }, { "epoch": 0.56, "learning_rate": 4.0683399714716163e-05, "loss": 0.7783, "step": 400000 }, { "epoch": 0.56, "learning_rate": 4.0671749770379596e-05, "loss": 0.7752, "step": 400500 }, { "epoch": 0.56, "learning_rate": 4.0660099826043035e-05, "loss": 0.7721, "step": 401000 }, { "epoch": 0.56, "learning_rate": 4.064844988170647e-05, "loss": 0.7773, "step": 401500 }, { "epoch": 0.56, "learning_rate": 4.06367999373699e-05, "loss": 0.7706, "step": 402000 }, { "epoch": 0.56, "learning_rate": 4.062514999303333e-05, "loss": 0.7756, "step": 402500 }, { "epoch": 0.56, "learning_rate": 4.061352334858544e-05, "loss": 0.7757, "step": 403000 }, { "epoch": 0.56, "learning_rate": 4.0601873404248874e-05, "loss": 0.7725, "step": 403500 }, { "epoch": 0.56, "learning_rate": 4.059022345991231e-05, "loss": 0.7725, "step": 404000 }, { "epoch": 0.57, "learning_rate": 4.0578573515575746e-05, "loss": 0.7754, "step": 404500 }, { "epoch": 0.57, "learning_rate": 4.056692357123918e-05, "loss": 0.7733, "step": 405000 }, { "epoch": 0.57, "learning_rate": 4.055529692679129e-05, "loss": 0.7706, "step": 405500 }, { "epoch": 0.57, "learning_rate": 4.054367028234339e-05, "loss": 0.7704, "step": 406000 }, { "epoch": 0.57, "learning_rate": 4.053202033800683e-05, "loss": 0.7765, "step": 406500 }, { "epoch": 0.57, "learning_rate": 4.052037039367026e-05, "loss": 0.7811, "step": 407000 }, { "epoch": 0.57, "learning_rate": 4.0508743749222365e-05, "loss": 0.7806, "step": 407500 }, { "epoch": 0.57, "learning_rate": 4.04970938048858e-05, "loss": 0.7753, "step": 408000 }, { "epoch": 0.57, "learning_rate": 4.0485443860549236e-05, "loss": 0.7761, "step": 408500 }, { "epoch": 0.57, "learning_rate": 4.0473840515990016e-05, "loss": 0.8088, "step": 409000 }, { "epoch": 0.57, "learning_rate": 4.0462190571653456e-05, "loss": 0.7892, "step": 409500 }, { "epoch": 0.57, "learning_rate": 4.045054062731689e-05, "loss": 0.7851, "step": 410000 }, { "epoch": 0.57, "learning_rate": 4.043889068298032e-05, "loss": 0.7802, "step": 410500 }, { "epoch": 0.57, "learning_rate": 4.042724073864375e-05, "loss": 0.7767, "step": 411000 }, { "epoch": 0.58, "learning_rate": 4.0415590794307185e-05, "loss": 0.7801, "step": 411500 }, { "epoch": 0.58, "learning_rate": 4.0403940849970624e-05, "loss": 0.7739, "step": 412000 }, { "epoch": 0.58, "learning_rate": 4.039231420552273e-05, "loss": 0.776, "step": 412500 }, { "epoch": 0.58, "learning_rate": 4.0380664261186166e-05, "loss": 0.7782, "step": 413000 }, { "epoch": 0.58, "learning_rate": 4.03690143168496e-05, "loss": 0.7742, "step": 413500 }, { "epoch": 0.58, "learning_rate": 4.035736437251303e-05, "loss": 0.781, "step": 414000 }, { "epoch": 0.58, "learning_rate": 4.034571442817646e-05, "loss": 0.7775, "step": 414500 }, { "epoch": 0.58, "learning_rate": 4.03340644838399e-05, "loss": 0.7733, "step": 415000 }, { "epoch": 0.58, "learning_rate": 4.0322414539503335e-05, "loss": 0.7693, "step": 415500 }, { "epoch": 0.58, "learning_rate": 4.031076459516677e-05, "loss": 0.7832, "step": 416000 }, { "epoch": 0.58, "learning_rate": 4.02991146508302e-05, "loss": 0.7757, "step": 416500 }, { "epoch": 0.58, "learning_rate": 4.028746470649363e-05, "loss": 0.7746, "step": 417000 }, { "epoch": 0.58, "learning_rate": 4.0275814762157064e-05, "loss": 0.7743, "step": 417500 }, { "epoch": 0.58, "learning_rate": 4.0264164817820503e-05, "loss": 0.7696, "step": 418000 }, { "epoch": 0.59, "learning_rate": 4.0252514873483936e-05, "loss": 0.7748, "step": 418500 }, { "epoch": 0.59, "learning_rate": 4.0240888229036045e-05, "loss": 0.7715, "step": 419000 }, { "epoch": 0.59, "learning_rate": 4.022926158458815e-05, "loss": 0.776, "step": 419500 }, { "epoch": 0.59, "learning_rate": 4.021761164025159e-05, "loss": 0.7788, "step": 420000 }, { "epoch": 0.59, "learning_rate": 4.020596169591502e-05, "loss": 0.8024, "step": 420500 }, { "epoch": 0.59, "learning_rate": 4.019431175157845e-05, "loss": 0.7791, "step": 421000 }, { "epoch": 0.59, "learning_rate": 4.018266180724189e-05, "loss": 0.7831, "step": 421500 }, { "epoch": 0.59, "learning_rate": 4.0171011862905324e-05, "loss": 0.7785, "step": 422000 }, { "epoch": 0.59, "learning_rate": 4.0159385218457427e-05, "loss": 0.7784, "step": 422500 }, { "epoch": 0.59, "learning_rate": 4.0147735274120866e-05, "loss": 0.7714, "step": 423000 }, { "epoch": 0.59, "learning_rate": 4.01360853297843e-05, "loss": 0.7827, "step": 423500 }, { "epoch": 0.59, "learning_rate": 4.01244586853364e-05, "loss": 0.7722, "step": 424000 }, { "epoch": 0.59, "learning_rate": 4.011280874099983e-05, "loss": 0.7777, "step": 424500 }, { "epoch": 0.59, "learning_rate": 4.010115879666327e-05, "loss": 0.7688, "step": 425000 }, { "epoch": 0.59, "learning_rate": 4.0089508852326705e-05, "loss": 0.7771, "step": 425500 }, { "epoch": 0.6, "learning_rate": 4.007785890799014e-05, "loss": 0.7729, "step": 426000 }, { "epoch": 0.6, "learning_rate": 4.0066208963653576e-05, "loss": 0.7732, "step": 426500 }, { "epoch": 0.6, "learning_rate": 4.005455901931701e-05, "loss": 0.7754, "step": 427000 }, { "epoch": 0.6, "learning_rate": 4.004290907498044e-05, "loss": 0.7773, "step": 427500 }, { "epoch": 0.6, "learning_rate": 4.003125913064388e-05, "loss": 0.7773, "step": 428000 }, { "epoch": 0.6, "learning_rate": 4.001963248619598e-05, "loss": 0.7772, "step": 428500 }, { "epoch": 0.6, "learning_rate": 4.0007982541859415e-05, "loss": 0.7723, "step": 429000 }, { "epoch": 0.6, "learning_rate": 3.9996355897411525e-05, "loss": 0.7829, "step": 429500 }, { "epoch": 0.6, "learning_rate": 3.998470595307496e-05, "loss": 0.7803, "step": 430000 }, { "epoch": 0.6, "learning_rate": 3.997305600873839e-05, "loss": 0.7911, "step": 430500 }, { "epoch": 0.6, "learning_rate": 3.996140606440182e-05, "loss": 0.7775, "step": 431000 }, { "epoch": 0.6, "learning_rate": 3.994975612006526e-05, "loss": 0.7801, "step": 431500 }, { "epoch": 0.6, "learning_rate": 3.9938106175728694e-05, "loss": 0.7775, "step": 432000 }, { "epoch": 0.6, "learning_rate": 3.9926456231392126e-05, "loss": 0.7758, "step": 432500 }, { "epoch": 0.61, "learning_rate": 3.991480628705556e-05, "loss": 0.7754, "step": 433000 }, { "epoch": 0.61, "learning_rate": 3.9903156342719e-05, "loss": 0.7784, "step": 433500 }, { "epoch": 0.61, "learning_rate": 3.989150639838243e-05, "loss": 0.7754, "step": 434000 }, { "epoch": 0.61, "learning_rate": 3.987987975393454e-05, "loss": 0.7753, "step": 434500 }, { "epoch": 0.61, "learning_rate": 3.986822980959797e-05, "loss": 0.7779, "step": 435000 }, { "epoch": 0.61, "learning_rate": 3.9856579865261404e-05, "loss": 0.7766, "step": 435500 }, { "epoch": 0.61, "learning_rate": 3.984492992092484e-05, "loss": 0.7806, "step": 436000 }, { "epoch": 0.61, "learning_rate": 3.983327997658827e-05, "loss": 0.7814, "step": 436500 }, { "epoch": 0.61, "learning_rate": 3.982163003225171e-05, "loss": 0.7784, "step": 437000 }, { "epoch": 0.61, "learning_rate": 3.980998008791515e-05, "loss": 0.7692, "step": 437500 }, { "epoch": 0.61, "learning_rate": 3.979833014357858e-05, "loss": 0.7721, "step": 438000 }, { "epoch": 0.61, "learning_rate": 3.978668019924201e-05, "loss": 0.7697, "step": 438500 }, { "epoch": 0.61, "learning_rate": 3.9775030254905445e-05, "loss": 0.7706, "step": 439000 }, { "epoch": 0.61, "learning_rate": 3.976338031056888e-05, "loss": 0.7728, "step": 439500 }, { "epoch": 0.62, "learning_rate": 3.975173036623231e-05, "loss": 0.774, "step": 440000 }, { "epoch": 0.62, "learning_rate": 3.974008042189575e-05, "loss": 0.7686, "step": 440500 }, { "epoch": 0.62, "learning_rate": 3.972847707733653e-05, "loss": 0.7732, "step": 441000 }, { "epoch": 0.62, "learning_rate": 3.971682713299996e-05, "loss": 0.7715, "step": 441500 }, { "epoch": 0.62, "learning_rate": 3.970517718866339e-05, "loss": 0.773, "step": 442000 }, { "epoch": 0.62, "learning_rate": 3.9693527244326826e-05, "loss": 0.7728, "step": 442500 }, { "epoch": 0.62, "learning_rate": 3.968187729999026e-05, "loss": 0.7694, "step": 443000 }, { "epoch": 0.62, "learning_rate": 3.96702273556537e-05, "loss": 0.773, "step": 443500 }, { "epoch": 0.62, "learning_rate": 3.965857741131713e-05, "loss": 0.7704, "step": 444000 }, { "epoch": 0.62, "learning_rate": 3.964692746698057e-05, "loss": 0.776, "step": 444500 }, { "epoch": 0.62, "learning_rate": 3.9635277522644e-05, "loss": 0.7741, "step": 445000 }, { "epoch": 0.62, "learning_rate": 3.9623627578307433e-05, "loss": 0.7656, "step": 445500 }, { "epoch": 0.62, "learning_rate": 3.9611977633970866e-05, "loss": 0.7709, "step": 446000 }, { "epoch": 0.62, "learning_rate": 3.9600374289411646e-05, "loss": 0.7688, "step": 446500 }, { "epoch": 0.62, "learning_rate": 3.958872434507508e-05, "loss": 0.7756, "step": 447000 }, { "epoch": 0.63, "learning_rate": 3.957707440073852e-05, "loss": 0.7668, "step": 447500 }, { "epoch": 0.63, "learning_rate": 3.956542445640195e-05, "loss": 0.7665, "step": 448000 }, { "epoch": 0.63, "learning_rate": 3.955377451206538e-05, "loss": 0.7719, "step": 448500 }, { "epoch": 0.63, "learning_rate": 3.9542124567728815e-05, "loss": 0.7662, "step": 449000 }, { "epoch": 0.63, "learning_rate": 3.953047462339225e-05, "loss": 0.7666, "step": 449500 }, { "epoch": 0.63, "learning_rate": 3.9518847978944357e-05, "loss": 0.7697, "step": 450000 }, { "epoch": 0.63, "learning_rate": 3.9507198034607796e-05, "loss": 0.769, "step": 450500 }, { "epoch": 0.63, "learning_rate": 3.949554809027123e-05, "loss": 0.7706, "step": 451000 }, { "epoch": 0.63, "learning_rate": 3.948389814593466e-05, "loss": 0.7684, "step": 451500 }, { "epoch": 0.63, "learning_rate": 3.947224820159809e-05, "loss": 0.7676, "step": 452000 }, { "epoch": 0.63, "learning_rate": 3.9460598257261525e-05, "loss": 0.771, "step": 452500 }, { "epoch": 0.63, "learning_rate": 3.944894831292496e-05, "loss": 0.7712, "step": 453000 }, { "epoch": 0.63, "learning_rate": 3.94372983685884e-05, "loss": 0.7688, "step": 453500 }, { "epoch": 0.63, "learning_rate": 3.942564842425183e-05, "loss": 0.771, "step": 454000 }, { "epoch": 0.64, "learning_rate": 3.941399847991527e-05, "loss": 0.7656, "step": 454500 }, { "epoch": 0.64, "learning_rate": 3.940237183546737e-05, "loss": 0.7647, "step": 455000 }, { "epoch": 0.64, "learning_rate": 3.9390721891130803e-05, "loss": 0.7624, "step": 455500 }, { "epoch": 0.64, "learning_rate": 3.9379071946794236e-05, "loss": 0.7673, "step": 456000 }, { "epoch": 0.64, "learning_rate": 3.9367422002457675e-05, "loss": 0.7634, "step": 456500 }, { "epoch": 0.64, "learning_rate": 3.935577205812111e-05, "loss": 0.7618, "step": 457000 }, { "epoch": 0.64, "learning_rate": 3.934412211378454e-05, "loss": 0.7607, "step": 457500 }, { "epoch": 0.64, "learning_rate": 3.933247216944798e-05, "loss": 0.7593, "step": 458000 }, { "epoch": 0.64, "learning_rate": 3.932082222511141e-05, "loss": 0.7635, "step": 458500 }, { "epoch": 0.64, "learning_rate": 3.9309172280774844e-05, "loss": 0.757, "step": 459000 }, { "epoch": 0.64, "learning_rate": 3.929752233643828e-05, "loss": 0.7646, "step": 459500 }, { "epoch": 0.64, "learning_rate": 3.9285895691990386e-05, "loss": 0.761, "step": 460000 }, { "epoch": 0.64, "learning_rate": 3.927424574765382e-05, "loss": 0.7659, "step": 460500 }, { "epoch": 0.64, "learning_rate": 3.926261910320593e-05, "loss": 0.7607, "step": 461000 }, { "epoch": 0.65, "learning_rate": 3.925096915886936e-05, "loss": 0.766, "step": 461500 }, { "epoch": 0.65, "learning_rate": 3.923931921453279e-05, "loss": 0.7602, "step": 462000 }, { "epoch": 0.65, "learning_rate": 3.9227669270196225e-05, "loss": 0.766, "step": 462500 }, { "epoch": 0.65, "learning_rate": 3.9216042625748334e-05, "loss": 0.7637, "step": 463000 }, { "epoch": 0.65, "learning_rate": 3.9204392681411773e-05, "loss": 0.7652, "step": 463500 }, { "epoch": 0.65, "learning_rate": 3.9192742737075206e-05, "loss": 0.7648, "step": 464000 }, { "epoch": 0.65, "learning_rate": 3.918109279273864e-05, "loss": 0.7639, "step": 464500 }, { "epoch": 0.65, "learning_rate": 3.916944284840207e-05, "loss": 0.766, "step": 465000 }, { "epoch": 0.65, "learning_rate": 3.91577929040655e-05, "loss": 0.769, "step": 465500 }, { "epoch": 0.65, "learning_rate": 3.914616625961761e-05, "loss": 0.7665, "step": 466000 }, { "epoch": 0.65, "learning_rate": 3.913451631528105e-05, "loss": 0.7663, "step": 466500 }, { "epoch": 0.65, "learning_rate": 3.9122866370944484e-05, "loss": 0.7672, "step": 467000 }, { "epoch": 0.65, "learning_rate": 3.9111216426607916e-05, "loss": 0.7622, "step": 467500 }, { "epoch": 0.65, "learning_rate": 3.9099613082048697e-05, "loss": 0.7655, "step": 468000 }, { "epoch": 0.65, "learning_rate": 3.908796313771213e-05, "loss": 0.7534, "step": 468500 }, { "epoch": 0.66, "learning_rate": 3.907631319337556e-05, "loss": 0.7596, "step": 469000 }, { "epoch": 0.66, "learning_rate": 3.9064663249038994e-05, "loss": 0.7588, "step": 469500 }, { "epoch": 0.66, "learning_rate": 3.9053013304702426e-05, "loss": 0.7607, "step": 470000 }, { "epoch": 0.66, "learning_rate": 3.9041363360365865e-05, "loss": 0.765, "step": 470500 }, { "epoch": 0.66, "learning_rate": 3.90297134160293e-05, "loss": 0.7609, "step": 471000 }, { "epoch": 0.66, "learning_rate": 3.901806347169274e-05, "loss": 0.7629, "step": 471500 }, { "epoch": 0.66, "learning_rate": 3.900641352735617e-05, "loss": 0.7656, "step": 472000 }, { "epoch": 0.66, "learning_rate": 3.89947635830196e-05, "loss": 0.7621, "step": 472500 }, { "epoch": 0.66, "learning_rate": 3.898311363868304e-05, "loss": 0.7606, "step": 473000 }, { "epoch": 0.66, "learning_rate": 3.897146369434647e-05, "loss": 0.7645, "step": 473500 }, { "epoch": 0.66, "learning_rate": 3.8959813750009905e-05, "loss": 0.7626, "step": 474000 }, { "epoch": 0.66, "learning_rate": 3.894816380567334e-05, "loss": 0.758, "step": 474500 }, { "epoch": 0.66, "learning_rate": 3.893651386133677e-05, "loss": 0.762, "step": 475000 }, { "epoch": 0.66, "learning_rate": 3.892488721688888e-05, "loss": 0.7597, "step": 475500 }, { "epoch": 0.67, "learning_rate": 3.891323727255231e-05, "loss": 0.7611, "step": 476000 }, { "epoch": 0.67, "learning_rate": 3.890158732821575e-05, "loss": 0.7588, "step": 476500 }, { "epoch": 0.67, "learning_rate": 3.8889937383879184e-05, "loss": 0.7607, "step": 477000 }, { "epoch": 0.67, "learning_rate": 3.8878287439542616e-05, "loss": 0.7602, "step": 477500 }, { "epoch": 0.67, "learning_rate": 3.886663749520605e-05, "loss": 0.7598, "step": 478000 }, { "epoch": 0.67, "learning_rate": 3.885501085075816e-05, "loss": 0.7653, "step": 478500 }, { "epoch": 0.67, "learning_rate": 3.884338420631026e-05, "loss": 0.7645, "step": 479000 }, { "epoch": 0.67, "learning_rate": 3.883175756186237e-05, "loss": 0.7676, "step": 479500 }, { "epoch": 0.67, "learning_rate": 3.88201076175258e-05, "loss": 0.768, "step": 480000 }, { "epoch": 0.67, "learning_rate": 3.880845767318924e-05, "loss": 0.7636, "step": 480500 }, { "epoch": 0.67, "learning_rate": 3.8796807728852674e-05, "loss": 0.7617, "step": 481000 }, { "epoch": 0.67, "learning_rate": 3.878515778451611e-05, "loss": 0.7571, "step": 481500 }, { "epoch": 0.67, "learning_rate": 3.877350784017954e-05, "loss": 0.7628, "step": 482000 }, { "epoch": 0.67, "learning_rate": 3.876185789584297e-05, "loss": 0.7676, "step": 482500 }, { "epoch": 0.68, "learning_rate": 3.875020795150641e-05, "loss": 0.7659, "step": 483000 }, { "epoch": 0.68, "learning_rate": 3.873858130705852e-05, "loss": 0.7647, "step": 483500 }, { "epoch": 0.68, "learning_rate": 3.872695466261062e-05, "loss": 0.779, "step": 484000 }, { "epoch": 0.68, "learning_rate": 3.871532801816273e-05, "loss": 0.7699, "step": 484500 }, { "epoch": 0.68, "learning_rate": 3.8703678073826165e-05, "loss": 0.7635, "step": 485000 }, { "epoch": 0.68, "learning_rate": 3.86920281294896e-05, "loss": 0.76, "step": 485500 }, { "epoch": 0.68, "learning_rate": 3.868037818515303e-05, "loss": 0.7626, "step": 486000 }, { "epoch": 0.68, "learning_rate": 3.866875154070514e-05, "loss": 0.7594, "step": 486500 }, { "epoch": 0.68, "learning_rate": 3.865710159636857e-05, "loss": 0.7585, "step": 487000 }, { "epoch": 0.68, "learning_rate": 3.864545165203201e-05, "loss": 0.7597, "step": 487500 }, { "epoch": 0.68, "learning_rate": 3.863380170769544e-05, "loss": 0.7558, "step": 488000 }, { "epoch": 0.68, "learning_rate": 3.8622151763358876e-05, "loss": 0.7594, "step": 488500 }, { "epoch": 0.68, "learning_rate": 3.861050181902231e-05, "loss": 0.7607, "step": 489000 }, { "epoch": 0.68, "learning_rate": 3.859885187468574e-05, "loss": 0.7572, "step": 489500 }, { "epoch": 0.69, "learning_rate": 3.858720193034917e-05, "loss": 0.7626, "step": 490000 }, { "epoch": 0.69, "learning_rate": 3.857555198601261e-05, "loss": 0.7603, "step": 490500 }, { "epoch": 0.69, "learning_rate": 3.8563902041676044e-05, "loss": 0.7659, "step": 491000 }, { "epoch": 0.69, "learning_rate": 3.8552252097339484e-05, "loss": 0.7642, "step": 491500 }, { "epoch": 0.69, "learning_rate": 3.8540625452891586e-05, "loss": 0.7595, "step": 492000 }, { "epoch": 0.69, "learning_rate": 3.852897550855502e-05, "loss": 0.7585, "step": 492500 }, { "epoch": 0.69, "learning_rate": 3.851732556421845e-05, "loss": 0.7628, "step": 493000 }, { "epoch": 0.69, "learning_rate": 3.850567561988189e-05, "loss": 0.7599, "step": 493500 }, { "epoch": 0.69, "learning_rate": 3.849402567554532e-05, "loss": 0.7542, "step": 494000 }, { "epoch": 0.69, "learning_rate": 3.848237573120876e-05, "loss": 0.7566, "step": 494500 }, { "epoch": 0.69, "learning_rate": 3.8470725786872194e-05, "loss": 0.7602, "step": 495000 }, { "epoch": 0.69, "learning_rate": 3.8459075842535627e-05, "loss": 0.7634, "step": 495500 }, { "epoch": 0.69, "learning_rate": 3.844742589819906e-05, "loss": 0.7639, "step": 496000 }, { "epoch": 0.69, "learning_rate": 3.843579925375117e-05, "loss": 0.7817, "step": 496500 }, { "epoch": 0.69, "learning_rate": 3.84241493094146e-05, "loss": 0.765, "step": 497000 }, { "epoch": 0.7, "learning_rate": 3.841249936507803e-05, "loss": 0.7583, "step": 497500 }, { "epoch": 0.7, "learning_rate": 3.840084942074147e-05, "loss": 0.7596, "step": 498000 }, { "epoch": 0.7, "learning_rate": 3.8389199476404905e-05, "loss": 0.7565, "step": 498500 }, { "epoch": 0.7, "learning_rate": 3.837757283195701e-05, "loss": 0.7573, "step": 499000 }, { "epoch": 0.7, "learning_rate": 3.836592288762044e-05, "loss": 0.7626, "step": 499500 }, { "epoch": 0.7, "learning_rate": 3.835427294328388e-05, "loss": 0.7613, "step": 500000 }, { "epoch": 0.7, "learning_rate": 3.834262299894731e-05, "loss": 0.7626, "step": 500500 }, { "epoch": 0.7, "learning_rate": 3.833099635449942e-05, "loss": 0.7662, "step": 501000 }, { "epoch": 0.7, "learning_rate": 3.8319346410162854e-05, "loss": 0.7616, "step": 501500 }, { "epoch": 0.7, "learning_rate": 3.8307696465826286e-05, "loss": 0.7705, "step": 502000 }, { "epoch": 0.7, "learning_rate": 3.8296069821378396e-05, "loss": 0.789, "step": 502500 }, { "epoch": 0.7, "learning_rate": 3.828441987704183e-05, "loss": 0.7674, "step": 503000 }, { "epoch": 0.7, "learning_rate": 3.827279323259394e-05, "loss": 0.7605, "step": 503500 }, { "epoch": 0.7, "learning_rate": 3.826114328825737e-05, "loss": 0.7618, "step": 504000 }, { "epoch": 0.71, "learning_rate": 3.824949334392081e-05, "loss": 0.7926, "step": 504500 }, { "epoch": 0.71, "learning_rate": 3.823784339958424e-05, "loss": 0.7731, "step": 505000 }, { "epoch": 0.71, "learning_rate": 3.8226193455247674e-05, "loss": 0.7662, "step": 505500 }, { "epoch": 0.71, "learning_rate": 3.8214543510911106e-05, "loss": 0.7591, "step": 506000 }, { "epoch": 0.71, "learning_rate": 3.820289356657454e-05, "loss": 0.7589, "step": 506500 }, { "epoch": 0.71, "learning_rate": 3.819124362223798e-05, "loss": 0.7617, "step": 507000 }, { "epoch": 0.71, "learning_rate": 3.817959367790141e-05, "loss": 0.7713, "step": 507500 }, { "epoch": 0.71, "learning_rate": 3.816796703345352e-05, "loss": 0.7646, "step": 508000 }, { "epoch": 0.71, "learning_rate": 3.815631708911695e-05, "loss": 0.7692, "step": 508500 }, { "epoch": 0.71, "learning_rate": 3.8144690444669055e-05, "loss": 0.7763, "step": 509000 }, { "epoch": 0.71, "learning_rate": 3.813304050033249e-05, "loss": 0.7661, "step": 509500 }, { "epoch": 0.71, "learning_rate": 3.812139055599592e-05, "loss": 0.7688, "step": 510000 }, { "epoch": 0.71, "learning_rate": 3.810974061165936e-05, "loss": 0.7744, "step": 510500 }, { "epoch": 0.71, "learning_rate": 3.809809066732279e-05, "loss": 0.8432, "step": 511000 }, { "epoch": 0.72, "learning_rate": 3.808644072298623e-05, "loss": 0.7715, "step": 511500 }, { "epoch": 0.72, "learning_rate": 3.807479077864966e-05, "loss": 0.77, "step": 512000 }, { "epoch": 0.72, "learning_rate": 3.8063140834313095e-05, "loss": 0.7677, "step": 512500 }, { "epoch": 0.72, "learning_rate": 3.80515141898652e-05, "loss": 0.7692, "step": 513000 }, { "epoch": 0.72, "learning_rate": 3.803986424552864e-05, "loss": 0.7634, "step": 513500 }, { "epoch": 0.72, "learning_rate": 3.802821430119207e-05, "loss": 0.7664, "step": 514000 }, { "epoch": 0.72, "learning_rate": 3.80165643568555e-05, "loss": 0.7629, "step": 514500 }, { "epoch": 0.72, "learning_rate": 3.800491441251894e-05, "loss": 0.761, "step": 515000 }, { "epoch": 0.72, "learning_rate": 3.799326446818237e-05, "loss": 0.7586, "step": 515500 }, { "epoch": 0.72, "learning_rate": 3.7981614523845806e-05, "loss": 0.7567, "step": 516000 }, { "epoch": 0.72, "learning_rate": 3.7969964579509245e-05, "loss": 0.7585, "step": 516500 }, { "epoch": 0.72, "learning_rate": 3.795831463517268e-05, "loss": 0.7553, "step": 517000 }, { "epoch": 0.72, "learning_rate": 3.794666469083611e-05, "loss": 0.7562, "step": 517500 }, { "epoch": 0.72, "learning_rate": 3.793501474649954e-05, "loss": 0.7585, "step": 518000 }, { "epoch": 0.72, "learning_rate": 3.792338810205165e-05, "loss": 0.7655, "step": 518500 }, { "epoch": 0.73, "learning_rate": 3.7911738157715084e-05, "loss": 0.7572, "step": 519000 }, { "epoch": 0.73, "learning_rate": 3.790008821337852e-05, "loss": 0.7553, "step": 519500 }, { "epoch": 0.73, "learning_rate": 3.7888438269041956e-05, "loss": 0.7555, "step": 520000 }, { "epoch": 0.73, "learning_rate": 3.787678832470539e-05, "loss": 0.7544, "step": 520500 }, { "epoch": 0.73, "learning_rate": 3.786513838036882e-05, "loss": 0.7523, "step": 521000 }, { "epoch": 0.73, "learning_rate": 3.785351173592093e-05, "loss": 0.7588, "step": 521500 }, { "epoch": 0.73, "learning_rate": 3.784188509147303e-05, "loss": 0.7585, "step": 522000 }, { "epoch": 0.73, "learning_rate": 3.7830235147136465e-05, "loss": 0.762, "step": 522500 }, { "epoch": 0.73, "learning_rate": 3.7818585202799904e-05, "loss": 0.7634, "step": 523000 }, { "epoch": 0.73, "learning_rate": 3.7806981858240684e-05, "loss": 0.809, "step": 523500 }, { "epoch": 0.73, "learning_rate": 3.779533191390412e-05, "loss": 0.7722, "step": 524000 }, { "epoch": 0.73, "learning_rate": 3.7783681969567556e-05, "loss": 0.7643, "step": 524500 }, { "epoch": 0.73, "learning_rate": 3.777203202523099e-05, "loss": 0.7642, "step": 525000 }, { "epoch": 0.73, "learning_rate": 3.776038208089442e-05, "loss": 0.7601, "step": 525500 }, { "epoch": 0.74, "learning_rate": 3.774873213655785e-05, "loss": 0.7588, "step": 526000 }, { "epoch": 0.74, "learning_rate": 3.7737105492109956e-05, "loss": 0.7585, "step": 526500 }, { "epoch": 0.74, "learning_rate": 3.7725455547773395e-05, "loss": 0.7676, "step": 527000 }, { "epoch": 0.74, "learning_rate": 3.771380560343683e-05, "loss": 0.7628, "step": 527500 }, { "epoch": 0.74, "learning_rate": 3.7702155659100266e-05, "loss": 0.7688, "step": 528000 }, { "epoch": 0.74, "learning_rate": 3.76905057147637e-05, "loss": 0.7663, "step": 528500 }, { "epoch": 0.74, "learning_rate": 3.767885577042713e-05, "loss": 0.7603, "step": 529000 }, { "epoch": 0.74, "learning_rate": 3.7667205826090564e-05, "loss": 0.7616, "step": 529500 }, { "epoch": 0.74, "learning_rate": 3.7655555881754e-05, "loss": 0.7589, "step": 530000 }, { "epoch": 0.74, "learning_rate": 3.7643905937417435e-05, "loss": 0.7646, "step": 530500 }, { "epoch": 0.74, "learning_rate": 3.763227929296954e-05, "loss": 0.768, "step": 531000 }, { "epoch": 0.74, "learning_rate": 3.762062934863298e-05, "loss": 0.7853, "step": 531500 }, { "epoch": 0.74, "learning_rate": 3.760897940429641e-05, "loss": 0.8162, "step": 532000 }, { "epoch": 0.74, "learning_rate": 3.759732945995984e-05, "loss": 0.7718, "step": 532500 }, { "epoch": 0.75, "learning_rate": 3.758567951562328e-05, "loss": 0.7669, "step": 533000 }, { "epoch": 0.75, "learning_rate": 3.7574029571286713e-05, "loss": 0.7724, "step": 533500 }, { "epoch": 0.75, "learning_rate": 3.7562402926838816e-05, "loss": 0.7688, "step": 534000 }, { "epoch": 0.75, "learning_rate": 3.755075298250225e-05, "loss": 0.7658, "step": 534500 }, { "epoch": 0.75, "learning_rate": 3.753910303816569e-05, "loss": 0.759, "step": 535000 }, { "epoch": 0.75, "learning_rate": 3.752745309382912e-05, "loss": 0.7589, "step": 535500 }, { "epoch": 0.75, "learning_rate": 3.751580314949255e-05, "loss": 0.7532, "step": 536000 }, { "epoch": 0.75, "learning_rate": 3.750415320515599e-05, "loss": 0.7542, "step": 536500 }, { "epoch": 0.75, "learning_rate": 3.7492526560708095e-05, "loss": 0.7556, "step": 537000 }, { "epoch": 0.75, "learning_rate": 3.748087661637153e-05, "loss": 0.7538, "step": 537500 }, { "epoch": 0.75, "learning_rate": 3.7469226672034966e-05, "loss": 0.7576, "step": 538000 }, { "epoch": 0.75, "learning_rate": 3.74575767276984e-05, "loss": 0.7719, "step": 538500 }, { "epoch": 0.75, "learning_rate": 3.74459500832505e-05, "loss": 0.7708, "step": 539000 }, { "epoch": 0.75, "learning_rate": 3.743432343880261e-05, "loss": 0.7702, "step": 539500 }, { "epoch": 0.75, "learning_rate": 3.742267349446604e-05, "loss": 0.7647, "step": 540000 }, { "epoch": 0.76, "learning_rate": 3.741102355012948e-05, "loss": 0.7661, "step": 540500 }, { "epoch": 0.76, "learning_rate": 3.7399373605792915e-05, "loss": 0.7664, "step": 541000 }, { "epoch": 0.76, "learning_rate": 3.738772366145635e-05, "loss": 0.7601, "step": 541500 }, { "epoch": 0.76, "learning_rate": 3.737607371711978e-05, "loss": 0.7611, "step": 542000 }, { "epoch": 0.76, "learning_rate": 3.736442377278321e-05, "loss": 0.7577, "step": 542500 }, { "epoch": 0.76, "learning_rate": 3.735277382844665e-05, "loss": 0.7568, "step": 543000 }, { "epoch": 0.76, "learning_rate": 3.7341123884110083e-05, "loss": 0.7585, "step": 543500 }, { "epoch": 0.76, "learning_rate": 3.7329473939773516e-05, "loss": 0.7563, "step": 544000 }, { "epoch": 0.76, "learning_rate": 3.731782399543695e-05, "loss": 0.7664, "step": 544500 }, { "epoch": 0.76, "learning_rate": 3.730617405110039e-05, "loss": 0.761, "step": 545000 }, { "epoch": 0.76, "learning_rate": 3.729452410676382e-05, "loss": 0.7524, "step": 545500 }, { "epoch": 0.76, "learning_rate": 3.728287416242726e-05, "loss": 0.7572, "step": 546000 }, { "epoch": 0.76, "learning_rate": 3.727122421809069e-05, "loss": 0.7625, "step": 546500 }, { "epoch": 0.76, "learning_rate": 3.7259574273754124e-05, "loss": 0.7609, "step": 547000 }, { "epoch": 0.77, "learning_rate": 3.7247924329417556e-05, "loss": 0.7564, "step": 547500 }, { "epoch": 0.77, "learning_rate": 3.723627438508099e-05, "loss": 0.7585, "step": 548000 }, { "epoch": 0.77, "learning_rate": 3.722462444074443e-05, "loss": 0.7576, "step": 548500 }, { "epoch": 0.77, "learning_rate": 3.721297449640786e-05, "loss": 0.7591, "step": 549000 }, { "epoch": 0.77, "learning_rate": 3.720132455207129e-05, "loss": 0.7579, "step": 549500 }, { "epoch": 0.77, "learning_rate": 3.718972120751207e-05, "loss": 0.7559, "step": 550000 }, { "epoch": 0.77, "learning_rate": 3.717809456306418e-05, "loss": 0.7588, "step": 550500 }, { "epoch": 0.77, "learning_rate": 3.7166444618727614e-05, "loss": 0.755, "step": 551000 }, { "epoch": 0.77, "learning_rate": 3.7154817974279724e-05, "loss": 0.7565, "step": 551500 }, { "epoch": 0.77, "learning_rate": 3.7143168029943156e-05, "loss": 0.7546, "step": 552000 }, { "epoch": 0.77, "learning_rate": 3.713151808560659e-05, "loss": 0.7571, "step": 552500 }, { "epoch": 0.77, "learning_rate": 3.711986814127003e-05, "loss": 0.7535, "step": 553000 }, { "epoch": 0.77, "learning_rate": 3.710821819693346e-05, "loss": 0.7545, "step": 553500 }, { "epoch": 0.77, "learning_rate": 3.709659155248556e-05, "loss": 0.7536, "step": 554000 }, { "epoch": 0.78, "learning_rate": 3.7084941608148995e-05, "loss": 0.7565, "step": 554500 }, { "epoch": 0.78, "learning_rate": 3.7073291663812435e-05, "loss": 0.7509, "step": 555000 }, { "epoch": 0.78, "learning_rate": 3.706164171947587e-05, "loss": 0.7554, "step": 555500 }, { "epoch": 0.78, "learning_rate": 3.70499917751393e-05, "loss": 0.7624, "step": 556000 }, { "epoch": 0.78, "learning_rate": 3.703834183080274e-05, "loss": 0.7503, "step": 556500 }, { "epoch": 0.78, "learning_rate": 3.702669188646617e-05, "loss": 0.7528, "step": 557000 }, { "epoch": 0.78, "learning_rate": 3.70150419421296e-05, "loss": 0.7537, "step": 557500 }, { "epoch": 0.78, "learning_rate": 3.7003391997793036e-05, "loss": 0.7563, "step": 558000 }, { "epoch": 0.78, "learning_rate": 3.699174205345647e-05, "loss": 0.7533, "step": 558500 }, { "epoch": 0.78, "learning_rate": 3.698011540900858e-05, "loss": 0.7503, "step": 559000 }, { "epoch": 0.78, "learning_rate": 3.696846546467202e-05, "loss": 0.7522, "step": 559500 }, { "epoch": 0.78, "learning_rate": 3.695681552033545e-05, "loss": 0.7426, "step": 560000 }, { "epoch": 0.78, "learning_rate": 3.694516557599888e-05, "loss": 0.7523, "step": 560500 }, { "epoch": 0.78, "learning_rate": 3.6933538931550984e-05, "loss": 0.7557, "step": 561000 }, { "epoch": 0.78, "learning_rate": 3.6921912287103094e-05, "loss": 0.7583, "step": 561500 }, { "epoch": 0.79, "learning_rate": 3.6910262342766526e-05, "loss": 0.7595, "step": 562000 }, { "epoch": 0.79, "learning_rate": 3.689861239842996e-05, "loss": 0.7562, "step": 562500 }, { "epoch": 0.79, "learning_rate": 3.688696245409339e-05, "loss": 0.7552, "step": 563000 }, { "epoch": 0.79, "learning_rate": 3.687531250975683e-05, "loss": 0.7538, "step": 563500 }, { "epoch": 0.79, "learning_rate": 3.686366256542026e-05, "loss": 0.7509, "step": 564000 }, { "epoch": 0.79, "learning_rate": 3.6852012621083695e-05, "loss": 0.7508, "step": 564500 }, { "epoch": 0.79, "learning_rate": 3.6840362676747134e-05, "loss": 0.7509, "step": 565000 }, { "epoch": 0.79, "learning_rate": 3.6828712732410566e-05, "loss": 0.7525, "step": 565500 }, { "epoch": 0.79, "learning_rate": 3.6817062788074006e-05, "loss": 0.75, "step": 566000 }, { "epoch": 0.79, "learning_rate": 3.680541284373744e-05, "loss": 0.7504, "step": 566500 }, { "epoch": 0.79, "learning_rate": 3.679376289940087e-05, "loss": 0.7468, "step": 567000 }, { "epoch": 0.79, "learning_rate": 3.67821129550643e-05, "loss": 0.7523, "step": 567500 }, { "epoch": 0.79, "learning_rate": 3.6770463010727735e-05, "loss": 0.7502, "step": 568000 }, { "epoch": 0.79, "learning_rate": 3.675881306639117e-05, "loss": 0.7525, "step": 568500 }, { "epoch": 0.8, "learning_rate": 3.674716312205461e-05, "loss": 0.7487, "step": 569000 }, { "epoch": 0.8, "learning_rate": 3.673551317771804e-05, "loss": 0.7443, "step": 569500 }, { "epoch": 0.8, "learning_rate": 3.672388653327015e-05, "loss": 0.7508, "step": 570000 }, { "epoch": 0.8, "learning_rate": 3.671223658893358e-05, "loss": 0.7477, "step": 570500 }, { "epoch": 0.8, "learning_rate": 3.6700586644597013e-05, "loss": 0.7492, "step": 571000 }, { "epoch": 0.8, "learning_rate": 3.6688936700260446e-05, "loss": 0.7507, "step": 571500 }, { "epoch": 0.8, "learning_rate": 3.6677286755923885e-05, "loss": 0.7468, "step": 572000 }, { "epoch": 0.8, "learning_rate": 3.666563681158732e-05, "loss": 0.7429, "step": 572500 }, { "epoch": 0.8, "learning_rate": 3.665398686725075e-05, "loss": 0.7482, "step": 573000 }, { "epoch": 0.8, "learning_rate": 3.664233692291418e-05, "loss": 0.7509, "step": 573500 }, { "epoch": 0.8, "learning_rate": 3.6630686978577614e-05, "loss": 0.748, "step": 574000 }, { "epoch": 0.8, "learning_rate": 3.6619060334129724e-05, "loss": 0.7517, "step": 574500 }, { "epoch": 0.8, "learning_rate": 3.6607433689681834e-05, "loss": 0.7454, "step": 575000 }, { "epoch": 0.8, "learning_rate": 3.6595783745345266e-05, "loss": 0.7448, "step": 575500 }, { "epoch": 0.81, "learning_rate": 3.6584133801008705e-05, "loss": 0.7461, "step": 576000 }, { "epoch": 0.81, "learning_rate": 3.657248385667214e-05, "loss": 0.7464, "step": 576500 }, { "epoch": 0.81, "learning_rate": 3.656083391233557e-05, "loss": 0.7469, "step": 577000 }, { "epoch": 0.81, "learning_rate": 3.6549183967999e-05, "loss": 0.743, "step": 577500 }, { "epoch": 0.81, "learning_rate": 3.6537534023662435e-05, "loss": 0.7531, "step": 578000 }, { "epoch": 0.81, "learning_rate": 3.6525884079325874e-05, "loss": 0.7455, "step": 578500 }, { "epoch": 0.81, "learning_rate": 3.6514234134989306e-05, "loss": 0.746, "step": 579000 }, { "epoch": 0.81, "learning_rate": 3.650258419065274e-05, "loss": 0.743, "step": 579500 }, { "epoch": 0.81, "learning_rate": 3.649093424631617e-05, "loss": 0.7446, "step": 580000 }, { "epoch": 0.81, "learning_rate": 3.64792843019796e-05, "loss": 0.7436, "step": 580500 }, { "epoch": 0.81, "learning_rate": 3.646765765753171e-05, "loss": 0.7461, "step": 581000 }, { "epoch": 0.81, "learning_rate": 3.645600771319515e-05, "loss": 0.7433, "step": 581500 }, { "epoch": 0.81, "learning_rate": 3.6444357768858584e-05, "loss": 0.7447, "step": 582000 }, { "epoch": 0.81, "learning_rate": 3.643270782452202e-05, "loss": 0.7462, "step": 582500 }, { "epoch": 0.82, "learning_rate": 3.642105788018545e-05, "loss": 0.7442, "step": 583000 }, { "epoch": 0.82, "learning_rate": 3.640943123573756e-05, "loss": 0.7412, "step": 583500 }, { "epoch": 0.82, "learning_rate": 3.639778129140099e-05, "loss": 0.742, "step": 584000 }, { "epoch": 0.82, "learning_rate": 3.6386131347064424e-05, "loss": 0.7478, "step": 584500 }, { "epoch": 0.82, "learning_rate": 3.637450470261653e-05, "loss": 0.7434, "step": 585000 }, { "epoch": 0.82, "learning_rate": 3.636287805816864e-05, "loss": 0.7487, "step": 585500 }, { "epoch": 0.82, "learning_rate": 3.6351228113832075e-05, "loss": 0.744, "step": 586000 }, { "epoch": 0.82, "learning_rate": 3.6339601469384185e-05, "loss": 0.7445, "step": 586500 }, { "epoch": 0.82, "learning_rate": 3.632795152504762e-05, "loss": 0.7438, "step": 587000 }, { "epoch": 0.82, "learning_rate": 3.631630158071105e-05, "loss": 0.7505, "step": 587500 }, { "epoch": 0.82, "learning_rate": 3.630465163637448e-05, "loss": 0.7444, "step": 588000 }, { "epoch": 0.82, "learning_rate": 3.6293001692037914e-05, "loss": 0.7484, "step": 588500 }, { "epoch": 0.82, "learning_rate": 3.6281351747701353e-05, "loss": 0.75, "step": 589000 }, { "epoch": 0.82, "learning_rate": 3.6269701803364786e-05, "loss": 0.7474, "step": 589500 }, { "epoch": 0.82, "learning_rate": 3.6258075158916895e-05, "loss": 0.7424, "step": 590000 }, { "epoch": 0.83, "learning_rate": 3.6246448514469e-05, "loss": 0.7483, "step": 590500 }, { "epoch": 0.83, "learning_rate": 3.623479857013243e-05, "loss": 0.7471, "step": 591000 }, { "epoch": 0.83, "learning_rate": 3.622314862579586e-05, "loss": 0.7421, "step": 591500 }, { "epoch": 0.83, "learning_rate": 3.621152198134797e-05, "loss": 0.7468, "step": 592000 }, { "epoch": 0.83, "learning_rate": 3.6199872037011405e-05, "loss": 0.7415, "step": 592500 }, { "epoch": 0.83, "learning_rate": 3.6188222092674844e-05, "loss": 0.7481, "step": 593000 }, { "epoch": 0.83, "learning_rate": 3.6176572148338277e-05, "loss": 0.7499, "step": 593500 }, { "epoch": 0.83, "learning_rate": 3.616492220400171e-05, "loss": 0.7443, "step": 594000 }, { "epoch": 0.83, "learning_rate": 3.615327225966514e-05, "loss": 0.7481, "step": 594500 }, { "epoch": 0.83, "learning_rate": 3.614164561521725e-05, "loss": 0.7456, "step": 595000 }, { "epoch": 0.83, "learning_rate": 3.612999567088068e-05, "loss": 0.7415, "step": 595500 }, { "epoch": 0.83, "learning_rate": 3.611834572654412e-05, "loss": 0.743, "step": 596000 }, { "epoch": 0.83, "learning_rate": 3.6106695782207555e-05, "loss": 0.7469, "step": 596500 }, { "epoch": 0.83, "learning_rate": 3.609504583787099e-05, "loss": 0.7473, "step": 597000 }, { "epoch": 0.84, "learning_rate": 3.608339589353442e-05, "loss": 0.741, "step": 597500 }, { "epoch": 0.84, "learning_rate": 3.607174594919785e-05, "loss": 0.75, "step": 598000 }, { "epoch": 0.84, "learning_rate": 3.606009600486129e-05, "loss": 0.7441, "step": 598500 }, { "epoch": 0.84, "learning_rate": 3.6048469360413394e-05, "loss": 0.7456, "step": 599000 }, { "epoch": 0.84, "learning_rate": 3.603684271596551e-05, "loss": 0.7458, "step": 599500 }, { "epoch": 0.84, "learning_rate": 3.602519277162894e-05, "loss": 0.7469, "step": 600000 }, { "epoch": 0.84, "learning_rate": 3.6013542827292375e-05, "loss": 0.7484, "step": 600500 }, { "epoch": 0.84, "learning_rate": 3.600189288295581e-05, "loss": 0.7463, "step": 601000 }, { "epoch": 0.84, "learning_rate": 3.599024293861924e-05, "loss": 0.7442, "step": 601500 }, { "epoch": 0.84, "learning_rate": 3.597859299428267e-05, "loss": 0.7382, "step": 602000 }, { "epoch": 0.84, "learning_rate": 3.596694304994611e-05, "loss": 0.7455, "step": 602500 }, { "epoch": 0.84, "learning_rate": 3.5955293105609544e-05, "loss": 0.7459, "step": 603000 }, { "epoch": 0.84, "learning_rate": 3.5943643161272976e-05, "loss": 0.7404, "step": 603500 }, { "epoch": 0.84, "learning_rate": 3.593199321693641e-05, "loss": 0.741, "step": 604000 }, { "epoch": 0.85, "learning_rate": 3.592034327259984e-05, "loss": 0.7449, "step": 604500 }, { "epoch": 0.85, "learning_rate": 3.590869332826328e-05, "loss": 0.7494, "step": 605000 }, { "epoch": 0.85, "learning_rate": 3.589704338392671e-05, "loss": 0.7427, "step": 605500 }, { "epoch": 0.85, "learning_rate": 3.5885393439590145e-05, "loss": 0.7445, "step": 606000 }, { "epoch": 0.85, "learning_rate": 3.5873743495253584e-05, "loss": 0.7414, "step": 606500 }, { "epoch": 0.85, "learning_rate": 3.5862093550917016e-05, "loss": 0.7412, "step": 607000 }, { "epoch": 0.85, "learning_rate": 3.585044360658045e-05, "loss": 0.7432, "step": 607500 }, { "epoch": 0.85, "learning_rate": 3.583881696213255e-05, "loss": 0.7586, "step": 608000 }, { "epoch": 0.85, "learning_rate": 3.582719031768466e-05, "loss": 0.7483, "step": 608500 }, { "epoch": 0.85, "learning_rate": 3.581556367323677e-05, "loss": 0.7401, "step": 609000 }, { "epoch": 0.85, "learning_rate": 3.580391372890021e-05, "loss": 0.7446, "step": 609500 }, { "epoch": 0.85, "learning_rate": 3.579226378456364e-05, "loss": 0.7418, "step": 610000 }, { "epoch": 0.85, "learning_rate": 3.5780613840227075e-05, "loss": 0.7411, "step": 610500 }, { "epoch": 0.85, "learning_rate": 3.576896389589051e-05, "loss": 0.7431, "step": 611000 }, { "epoch": 0.85, "learning_rate": 3.575731395155394e-05, "loss": 0.7452, "step": 611500 }, { "epoch": 0.86, "learning_rate": 3.574568730710605e-05, "loss": 0.7424, "step": 612000 }, { "epoch": 0.86, "learning_rate": 3.573403736276948e-05, "loss": 0.7438, "step": 612500 }, { "epoch": 0.86, "learning_rate": 3.572238741843292e-05, "loss": 0.7427, "step": 613000 }, { "epoch": 0.86, "learning_rate": 3.571073747409635e-05, "loss": 0.7442, "step": 613500 }, { "epoch": 0.86, "learning_rate": 3.5699087529759785e-05, "loss": 0.745, "step": 614000 }, { "epoch": 0.86, "learning_rate": 3.568743758542322e-05, "loss": 0.7485, "step": 614500 }, { "epoch": 0.86, "learning_rate": 3.567578764108665e-05, "loss": 0.7445, "step": 615000 }, { "epoch": 0.86, "learning_rate": 3.566413769675009e-05, "loss": 0.7467, "step": 615500 }, { "epoch": 0.86, "learning_rate": 3.56525110523022e-05, "loss": 0.7397, "step": 616000 }, { "epoch": 0.86, "learning_rate": 3.564086110796563e-05, "loss": 0.7442, "step": 616500 }, { "epoch": 0.86, "learning_rate": 3.5629211163629064e-05, "loss": 0.7415, "step": 617000 }, { "epoch": 0.86, "learning_rate": 3.5617584519181166e-05, "loss": 0.7451, "step": 617500 }, { "epoch": 0.86, "learning_rate": 3.56059345748446e-05, "loss": 0.7447, "step": 618000 }, { "epoch": 0.86, "learning_rate": 3.559428463050804e-05, "loss": 0.7402, "step": 618500 }, { "epoch": 0.87, "learning_rate": 3.558263468617147e-05, "loss": 0.7419, "step": 619000 }, { "epoch": 0.87, "learning_rate": 3.557098474183491e-05, "loss": 0.7419, "step": 619500 }, { "epoch": 0.87, "learning_rate": 3.555933479749834e-05, "loss": 0.7372, "step": 620000 }, { "epoch": 0.87, "learning_rate": 3.5547684853161774e-05, "loss": 0.7416, "step": 620500 }, { "epoch": 0.87, "learning_rate": 3.5536034908825207e-05, "loss": 0.7508, "step": 621000 }, { "epoch": 0.87, "learning_rate": 3.552440826437731e-05, "loss": 0.7427, "step": 621500 }, { "epoch": 0.87, "learning_rate": 3.551275832004075e-05, "loss": 0.7553, "step": 622000 }, { "epoch": 0.87, "learning_rate": 3.550110837570418e-05, "loss": 0.7431, "step": 622500 }, { "epoch": 0.87, "learning_rate": 3.548948173125629e-05, "loss": 0.7429, "step": 623000 }, { "epoch": 0.87, "learning_rate": 3.547783178691972e-05, "loss": 0.7445, "step": 623500 }, { "epoch": 0.87, "learning_rate": 3.546620514247183e-05, "loss": 0.7428, "step": 624000 }, { "epoch": 0.87, "learning_rate": 3.5454555198135265e-05, "loss": 0.7404, "step": 624500 }, { "epoch": 0.87, "learning_rate": 3.54429052537987e-05, "loss": 0.7428, "step": 625000 }, { "epoch": 0.87, "learning_rate": 3.5431255309462136e-05, "loss": 0.7454, "step": 625500 }, { "epoch": 0.88, "learning_rate": 3.541960536512557e-05, "loss": 0.7431, "step": 626000 }, { "epoch": 0.88, "learning_rate": 3.5407955420789e-05, "loss": 0.7449, "step": 626500 }, { "epoch": 0.88, "learning_rate": 3.5396305476452434e-05, "loss": 0.7501, "step": 627000 }, { "epoch": 0.88, "learning_rate": 3.5384655532115866e-05, "loss": 0.7417, "step": 627500 }, { "epoch": 0.88, "learning_rate": 3.53730055877793e-05, "loss": 0.7425, "step": 628000 }, { "epoch": 0.88, "learning_rate": 3.536135564344274e-05, "loss": 0.7457, "step": 628500 }, { "epoch": 0.88, "learning_rate": 3.534970569910617e-05, "loss": 0.7378, "step": 629000 }, { "epoch": 0.88, "learning_rate": 3.533805575476961e-05, "loss": 0.7445, "step": 629500 }, { "epoch": 0.88, "learning_rate": 3.532640581043304e-05, "loss": 0.7372, "step": 630000 }, { "epoch": 0.88, "learning_rate": 3.5314755866096474e-05, "loss": 0.7676, "step": 630500 }, { "epoch": 0.88, "learning_rate": 3.530310592175991e-05, "loss": 0.7583, "step": 631000 }, { "epoch": 0.88, "learning_rate": 3.5291455977423345e-05, "loss": 0.7405, "step": 631500 }, { "epoch": 0.88, "learning_rate": 3.527980603308678e-05, "loss": 0.7347, "step": 632000 }, { "epoch": 0.88, "learning_rate": 3.526817938863888e-05, "loss": 0.7391, "step": 632500 }, { "epoch": 0.88, "learning_rate": 3.525652944430232e-05, "loss": 0.7357, "step": 633000 }, { "epoch": 0.89, "learning_rate": 3.524487949996575e-05, "loss": 0.7388, "step": 633500 }, { "epoch": 0.89, "learning_rate": 3.5233229555629184e-05, "loss": 0.7448, "step": 634000 }, { "epoch": 0.89, "learning_rate": 3.5221579611292623e-05, "loss": 0.7372, "step": 634500 }, { "epoch": 0.89, "learning_rate": 3.5209929666956056e-05, "loss": 0.7379, "step": 635000 }, { "epoch": 0.89, "learning_rate": 3.519827972261949e-05, "loss": 0.7445, "step": 635500 }, { "epoch": 0.89, "learning_rate": 3.518662977828292e-05, "loss": 0.7409, "step": 636000 }, { "epoch": 0.89, "learning_rate": 3.517497983394635e-05, "loss": 0.7337, "step": 636500 }, { "epoch": 0.89, "learning_rate": 3.516332988960979e-05, "loss": 0.7405, "step": 637000 }, { "epoch": 0.89, "learning_rate": 3.5151679945273225e-05, "loss": 0.7388, "step": 637500 }, { "epoch": 0.89, "learning_rate": 3.514003000093666e-05, "loss": 0.7402, "step": 638000 }, { "epoch": 0.89, "learning_rate": 3.512838005660009e-05, "loss": 0.7424, "step": 638500 }, { "epoch": 0.89, "learning_rate": 3.51167534121522e-05, "loss": 0.7403, "step": 639000 }, { "epoch": 0.89, "learning_rate": 3.510510346781563e-05, "loss": 0.7342, "step": 639500 }, { "epoch": 0.89, "learning_rate": 3.509347682336774e-05, "loss": 0.7364, "step": 640000 }, { "epoch": 0.9, "learning_rate": 3.508182687903117e-05, "loss": 0.7413, "step": 640500 }, { "epoch": 0.9, "learning_rate": 3.507017693469461e-05, "loss": 0.7447, "step": 641000 }, { "epoch": 0.9, "learning_rate": 3.5058526990358045e-05, "loss": 0.7366, "step": 641500 }, { "epoch": 0.9, "learning_rate": 3.504687704602148e-05, "loss": 0.7382, "step": 642000 }, { "epoch": 0.9, "learning_rate": 3.503522710168491e-05, "loss": 0.7383, "step": 642500 }, { "epoch": 0.9, "learning_rate": 3.502360045723702e-05, "loss": 0.7369, "step": 643000 }, { "epoch": 0.9, "learning_rate": 3.501195051290045e-05, "loss": 0.7393, "step": 643500 }, { "epoch": 0.9, "learning_rate": 3.5000323868452554e-05, "loss": 0.7413, "step": 644000 }, { "epoch": 0.9, "learning_rate": 3.4988673924115994e-05, "loss": 0.7318, "step": 644500 }, { "epoch": 0.9, "learning_rate": 3.4977023979779426e-05, "loss": 0.7414, "step": 645000 }, { "epoch": 0.9, "learning_rate": 3.496537403544286e-05, "loss": 0.7384, "step": 645500 }, { "epoch": 0.9, "learning_rate": 3.495372409110629e-05, "loss": 0.7379, "step": 646000 }, { "epoch": 0.9, "learning_rate": 3.494207414676973e-05, "loss": 0.7382, "step": 646500 }, { "epoch": 0.9, "learning_rate": 3.493042420243316e-05, "loss": 0.7359, "step": 647000 }, { "epoch": 0.91, "learning_rate": 3.49187742580966e-05, "loss": 0.7391, "step": 647500 }, { "epoch": 0.91, "learning_rate": 3.4907124313760034e-05, "loss": 0.7401, "step": 648000 }, { "epoch": 0.91, "learning_rate": 3.4895474369423466e-05, "loss": 0.7341, "step": 648500 }, { "epoch": 0.91, "learning_rate": 3.48838244250869e-05, "loss": 0.7383, "step": 649000 }, { "epoch": 0.91, "learning_rate": 3.487217448075033e-05, "loss": 0.7351, "step": 649500 }, { "epoch": 0.91, "learning_rate": 3.486054783630244e-05, "loss": 0.7383, "step": 650000 }, { "epoch": 0.91, "learning_rate": 3.484889789196587e-05, "loss": 0.7362, "step": 650500 }, { "epoch": 0.91, "learning_rate": 3.483727124751798e-05, "loss": 0.7369, "step": 651000 }, { "epoch": 0.91, "learning_rate": 3.4825621303181415e-05, "loss": 0.7356, "step": 651500 }, { "epoch": 0.91, "learning_rate": 3.481397135884485e-05, "loss": 0.7379, "step": 652000 }, { "epoch": 0.91, "learning_rate": 3.480232141450828e-05, "loss": 0.7419, "step": 652500 }, { "epoch": 0.91, "learning_rate": 3.479067147017171e-05, "loss": 0.7343, "step": 653000 }, { "epoch": 0.91, "learning_rate": 3.477904482572382e-05, "loss": 0.7387, "step": 653500 }, { "epoch": 0.91, "learning_rate": 3.476741818127593e-05, "loss": 0.7362, "step": 654000 }, { "epoch": 0.91, "learning_rate": 3.475576823693937e-05, "loss": 0.7391, "step": 654500 }, { "epoch": 0.92, "learning_rate": 3.47441182926028e-05, "loss": 0.7476, "step": 655000 }, { "epoch": 0.92, "learning_rate": 3.4732468348266235e-05, "loss": 0.7382, "step": 655500 }, { "epoch": 0.92, "learning_rate": 3.472081840392967e-05, "loss": 0.7402, "step": 656000 }, { "epoch": 0.92, "learning_rate": 3.47091684595931e-05, "loss": 0.7347, "step": 656500 }, { "epoch": 0.92, "learning_rate": 3.469751851525654e-05, "loss": 0.7413, "step": 657000 }, { "epoch": 0.92, "learning_rate": 3.468586857091997e-05, "loss": 0.7415, "step": 657500 }, { "epoch": 0.92, "learning_rate": 3.4674218626583404e-05, "loss": 0.7383, "step": 658000 }, { "epoch": 0.92, "learning_rate": 3.466259198213551e-05, "loss": 0.737, "step": 658500 }, { "epoch": 0.92, "learning_rate": 3.4650942037798946e-05, "loss": 0.7321, "step": 659000 }, { "epoch": 0.92, "learning_rate": 3.463929209346238e-05, "loss": 0.7359, "step": 659500 }, { "epoch": 0.92, "learning_rate": 3.462764214912581e-05, "loss": 0.7408, "step": 660000 }, { "epoch": 0.92, "learning_rate": 3.461599220478925e-05, "loss": 0.7387, "step": 660500 }, { "epoch": 0.92, "learning_rate": 3.460434226045268e-05, "loss": 0.7339, "step": 661000 }, { "epoch": 0.92, "learning_rate": 3.4592692316116114e-05, "loss": 0.7356, "step": 661500 }, { "epoch": 0.93, "learning_rate": 3.458104237177955e-05, "loss": 0.7336, "step": 662000 }, { "epoch": 0.93, "learning_rate": 3.456939242744298e-05, "loss": 0.7345, "step": 662500 }, { "epoch": 0.93, "learning_rate": 3.455774248310642e-05, "loss": 0.7389, "step": 663000 }, { "epoch": 0.93, "learning_rate": 3.454611583865853e-05, "loss": 0.7298, "step": 663500 }, { "epoch": 0.93, "learning_rate": 3.453446589432196e-05, "loss": 0.736, "step": 664000 }, { "epoch": 0.93, "learning_rate": 3.452286254976274e-05, "loss": 0.7367, "step": 664500 }, { "epoch": 0.93, "learning_rate": 3.451121260542617e-05, "loss": 0.7413, "step": 665000 }, { "epoch": 0.93, "learning_rate": 3.4499562661089605e-05, "loss": 0.739, "step": 665500 }, { "epoch": 0.93, "learning_rate": 3.448791271675304e-05, "loss": 0.7437, "step": 666000 }, { "epoch": 0.93, "learning_rate": 3.4476262772416477e-05, "loss": 0.7421, "step": 666500 }, { "epoch": 0.93, "learning_rate": 3.446463612796858e-05, "loss": 0.7378, "step": 667000 }, { "epoch": 0.93, "learning_rate": 3.445298618363202e-05, "loss": 0.7381, "step": 667500 }, { "epoch": 0.93, "learning_rate": 3.444133623929545e-05, "loss": 0.7368, "step": 668000 }, { "epoch": 0.93, "learning_rate": 3.442968629495888e-05, "loss": 0.7347, "step": 668500 }, { "epoch": 0.94, "learning_rate": 3.4418036350622316e-05, "loss": 0.7359, "step": 669000 }, { "epoch": 0.94, "learning_rate": 3.440638640628575e-05, "loss": 0.7371, "step": 669500 }, { "epoch": 0.94, "learning_rate": 3.439473646194919e-05, "loss": 0.734, "step": 670000 }, { "epoch": 0.94, "learning_rate": 3.438308651761262e-05, "loss": 0.7397, "step": 670500 }, { "epoch": 0.94, "learning_rate": 3.437143657327606e-05, "loss": 0.733, "step": 671000 }, { "epoch": 0.94, "learning_rate": 3.435978662893949e-05, "loss": 0.7378, "step": 671500 }, { "epoch": 0.94, "learning_rate": 3.4348136684602923e-05, "loss": 0.7305, "step": 672000 }, { "epoch": 0.94, "learning_rate": 3.4336486740266356e-05, "loss": 0.736, "step": 672500 }, { "epoch": 0.94, "learning_rate": 3.4324836795929795e-05, "loss": 0.7397, "step": 673000 }, { "epoch": 0.94, "learning_rate": 3.4313256751259246e-05, "loss": 0.7345, "step": 673500 }, { "epoch": 0.94, "learning_rate": 3.430160680692268e-05, "loss": 0.7336, "step": 674000 }, { "epoch": 0.94, "learning_rate": 3.428995686258612e-05, "loss": 0.7354, "step": 674500 }, { "epoch": 0.94, "learning_rate": 3.427833021813822e-05, "loss": 0.7357, "step": 675000 }, { "epoch": 0.94, "learning_rate": 3.426668027380165e-05, "loss": 0.7326, "step": 675500 }, { "epoch": 0.95, "learning_rate": 3.4255030329465085e-05, "loss": 0.7368, "step": 676000 }, { "epoch": 0.95, "learning_rate": 3.4243380385128524e-05, "loss": 0.7383, "step": 676500 }, { "epoch": 0.95, "learning_rate": 3.4231730440791956e-05, "loss": 0.7324, "step": 677000 }, { "epoch": 0.95, "learning_rate": 3.4220080496455395e-05, "loss": 0.7343, "step": 677500 }, { "epoch": 0.95, "learning_rate": 3.420843055211883e-05, "loss": 0.733, "step": 678000 }, { "epoch": 0.95, "learning_rate": 3.419678060778226e-05, "loss": 0.7314, "step": 678500 }, { "epoch": 0.95, "learning_rate": 3.418513066344569e-05, "loss": 0.732, "step": 679000 }, { "epoch": 0.95, "learning_rate": 3.4173480719109125e-05, "loss": 0.7353, "step": 679500 }, { "epoch": 0.95, "learning_rate": 3.4161854074661234e-05, "loss": 0.7342, "step": 680000 }, { "epoch": 0.95, "learning_rate": 3.415020413032467e-05, "loss": 0.7301, "step": 680500 }, { "epoch": 0.95, "learning_rate": 3.4138554185988106e-05, "loss": 0.7311, "step": 681000 }, { "epoch": 0.95, "learning_rate": 3.412692754154021e-05, "loss": 0.7326, "step": 681500 }, { "epoch": 0.95, "learning_rate": 3.411527759720364e-05, "loss": 0.731, "step": 682000 }, { "epoch": 0.95, "learning_rate": 3.4103627652867074e-05, "loss": 0.734, "step": 682500 }, { "epoch": 0.95, "learning_rate": 3.4091977708530506e-05, "loss": 0.7337, "step": 683000 }, { "epoch": 0.96, "learning_rate": 3.4080327764193945e-05, "loss": 0.7311, "step": 683500 }, { "epoch": 0.96, "learning_rate": 3.406867781985738e-05, "loss": 0.7373, "step": 684000 }, { "epoch": 0.96, "learning_rate": 3.405705117540949e-05, "loss": 0.7345, "step": 684500 }, { "epoch": 0.96, "learning_rate": 3.404540123107292e-05, "loss": 0.7408, "step": 685000 }, { "epoch": 0.96, "learning_rate": 3.403375128673635e-05, "loss": 0.7346, "step": 685500 }, { "epoch": 0.96, "learning_rate": 3.402212464228846e-05, "loss": 0.7347, "step": 686000 }, { "epoch": 0.96, "learning_rate": 3.4010474697951894e-05, "loss": 0.7323, "step": 686500 }, { "epoch": 0.96, "learning_rate": 3.3998824753615326e-05, "loss": 0.7337, "step": 687000 }, { "epoch": 0.96, "learning_rate": 3.3987174809278765e-05, "loss": 0.7327, "step": 687500 }, { "epoch": 0.96, "learning_rate": 3.39755248649422e-05, "loss": 0.7318, "step": 688000 }, { "epoch": 0.96, "learning_rate": 3.396387492060563e-05, "loss": 0.7357, "step": 688500 }, { "epoch": 0.96, "learning_rate": 3.395224827615774e-05, "loss": 0.7335, "step": 689000 }, { "epoch": 0.96, "learning_rate": 3.394059833182117e-05, "loss": 0.732, "step": 689500 }, { "epoch": 0.96, "learning_rate": 3.3928948387484604e-05, "loss": 0.7314, "step": 690000 } ], "max_steps": 2145933, "num_train_epochs": 3, "total_flos": 1.543433599991808e+19, "trial_name": null, "trial_params": null }