{ "best_metric": null, "best_model_checkpoint": null, "epoch": 199.88412514484358, "global_step": 690000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.996378910776362e-05, "loss": 8.4547, "step": 500 }, { "epoch": 0.29, "learning_rate": 4.9927578215527235e-05, "loss": 7.8499, "step": 1000 }, { "epoch": 0.43, "learning_rate": 4.989136732329085e-05, "loss": 7.554, "step": 1500 }, { "epoch": 0.58, "learning_rate": 4.985515643105446e-05, "loss": 7.4955, "step": 2000 }, { "epoch": 0.72, "learning_rate": 4.9818945538818076e-05, "loss": 7.2532, "step": 2500 }, { "epoch": 0.87, "learning_rate": 4.97827346465817e-05, "loss": 7.2584, "step": 3000 }, { "epoch": 1.01, "learning_rate": 4.974652375434531e-05, "loss": 7.175, "step": 3500 }, { "epoch": 1.16, "learning_rate": 4.9710312862108924e-05, "loss": 6.9372, "step": 4000 }, { "epoch": 1.3, "learning_rate": 4.967410196987254e-05, "loss": 6.9367, "step": 4500 }, { "epoch": 1.45, "learning_rate": 4.9637891077636157e-05, "loss": 6.7982, "step": 5000 }, { "epoch": 1.59, "learning_rate": 4.9601680185399766e-05, "loss": 6.9367, "step": 5500 }, { "epoch": 1.74, "learning_rate": 4.956546929316339e-05, "loss": 6.7646, "step": 6000 }, { "epoch": 1.88, "learning_rate": 4.9529258400927005e-05, "loss": 6.8283, "step": 6500 }, { "epoch": 2.03, "learning_rate": 4.9493047508690614e-05, "loss": 6.6735, "step": 7000 }, { "epoch": 2.17, "learning_rate": 4.945683661645423e-05, "loss": 6.5376, "step": 7500 }, { "epoch": 2.32, "learning_rate": 4.9420625724217846e-05, "loss": 6.4529, "step": 8000 }, { "epoch": 2.46, "learning_rate": 4.938441483198146e-05, "loss": 6.5368, "step": 8500 }, { "epoch": 2.61, "learning_rate": 4.934820393974508e-05, "loss": 6.4591, "step": 9000 }, { "epoch": 2.75, "learning_rate": 4.9311993047508695e-05, "loss": 6.313, "step": 9500 }, { "epoch": 2.9, "learning_rate": 4.927578215527231e-05, "loss": 6.4253, "step": 10000 }, { "epoch": 3.04, "learning_rate": 4.923957126303592e-05, "loss": 6.2994, "step": 10500 }, { "epoch": 3.19, "learning_rate": 4.9203360370799536e-05, "loss": 6.1315, "step": 11000 }, { "epoch": 3.33, "learning_rate": 4.916714947856315e-05, "loss": 6.2794, "step": 11500 }, { "epoch": 3.48, "learning_rate": 4.913093858632677e-05, "loss": 6.2569, "step": 12000 }, { "epoch": 3.62, "learning_rate": 4.9094727694090384e-05, "loss": 6.1676, "step": 12500 }, { "epoch": 3.77, "learning_rate": 4.9058516801854e-05, "loss": 6.0854, "step": 13000 }, { "epoch": 3.91, "learning_rate": 4.902230590961762e-05, "loss": 6.1253, "step": 13500 }, { "epoch": 4.06, "learning_rate": 4.8986095017381226e-05, "loss": 6.1414, "step": 14000 }, { "epoch": 4.2, "learning_rate": 4.894988412514485e-05, "loss": 6.0679, "step": 14500 }, { "epoch": 4.35, "learning_rate": 4.8913673232908465e-05, "loss": 6.0058, "step": 15000 }, { "epoch": 4.49, "learning_rate": 4.8877462340672074e-05, "loss": 6.0498, "step": 15500 }, { "epoch": 4.63, "learning_rate": 4.884125144843569e-05, "loss": 5.9609, "step": 16000 }, { "epoch": 4.78, "learning_rate": 4.8805040556199306e-05, "loss": 5.9032, "step": 16500 }, { "epoch": 4.92, "learning_rate": 4.876882966396292e-05, "loss": 5.7145, "step": 17000 }, { "epoch": 5.07, "learning_rate": 4.873261877172654e-05, "loss": 5.8655, "step": 17500 }, { "epoch": 5.21, "learning_rate": 4.8696407879490155e-05, "loss": 5.7881, "step": 18000 }, { "epoch": 5.36, "learning_rate": 4.866019698725377e-05, "loss": 5.7224, "step": 18500 }, { "epoch": 5.5, "learning_rate": 4.862398609501738e-05, "loss": 5.8037, "step": 19000 }, { "epoch": 5.65, "learning_rate": 4.8587775202780996e-05, "loss": 5.7771, "step": 19500 }, { "epoch": 5.79, "learning_rate": 4.855156431054461e-05, "loss": 5.6896, "step": 20000 }, { "epoch": 5.94, "learning_rate": 4.851535341830823e-05, "loss": 5.6381, "step": 20500 }, { "epoch": 6.08, "learning_rate": 4.8479142526071845e-05, "loss": 5.7065, "step": 21000 }, { "epoch": 6.23, "learning_rate": 4.844293163383546e-05, "loss": 5.6466, "step": 21500 }, { "epoch": 6.37, "learning_rate": 4.840672074159908e-05, "loss": 5.5202, "step": 22000 }, { "epoch": 6.52, "learning_rate": 4.8370509849362686e-05, "loss": 5.553, "step": 22500 }, { "epoch": 6.66, "learning_rate": 4.833429895712631e-05, "loss": 5.523, "step": 23000 }, { "epoch": 6.81, "learning_rate": 4.8298088064889925e-05, "loss": 5.4201, "step": 23500 }, { "epoch": 6.95, "learning_rate": 4.8261877172653534e-05, "loss": 5.5552, "step": 24000 }, { "epoch": 7.1, "learning_rate": 4.822566628041715e-05, "loss": 5.361, "step": 24500 }, { "epoch": 7.24, "learning_rate": 4.8189455388180767e-05, "loss": 5.4092, "step": 25000 }, { "epoch": 7.39, "learning_rate": 4.815324449594438e-05, "loss": 5.3235, "step": 25500 }, { "epoch": 7.53, "learning_rate": 4.811703360370799e-05, "loss": 5.4796, "step": 26000 }, { "epoch": 7.68, "learning_rate": 4.8080822711471615e-05, "loss": 5.3789, "step": 26500 }, { "epoch": 7.82, "learning_rate": 4.804461181923523e-05, "loss": 5.2428, "step": 27000 }, { "epoch": 7.97, "learning_rate": 4.800840092699884e-05, "loss": 5.4215, "step": 27500 }, { "epoch": 8.11, "learning_rate": 4.797219003476246e-05, "loss": 5.2112, "step": 28000 }, { "epoch": 8.26, "learning_rate": 4.793597914252607e-05, "loss": 5.4211, "step": 28500 }, { "epoch": 8.4, "learning_rate": 4.789976825028969e-05, "loss": 5.2253, "step": 29000 }, { "epoch": 8.55, "learning_rate": 4.7863557358053305e-05, "loss": 5.1749, "step": 29500 }, { "epoch": 8.69, "learning_rate": 4.782734646581692e-05, "loss": 5.121, "step": 30000 }, { "epoch": 8.84, "learning_rate": 4.779113557358054e-05, "loss": 5.1387, "step": 30500 }, { "epoch": 8.98, "learning_rate": 4.7754924681344146e-05, "loss": 5.198, "step": 31000 }, { "epoch": 9.13, "learning_rate": 4.771871378910777e-05, "loss": 4.9978, "step": 31500 }, { "epoch": 9.27, "learning_rate": 4.7682502896871385e-05, "loss": 4.9791, "step": 32000 }, { "epoch": 9.41, "learning_rate": 4.7646292004634994e-05, "loss": 4.9845, "step": 32500 }, { "epoch": 9.56, "learning_rate": 4.761008111239861e-05, "loss": 4.8999, "step": 33000 }, { "epoch": 9.7, "learning_rate": 4.7573870220162227e-05, "loss": 4.9872, "step": 33500 }, { "epoch": 9.85, "learning_rate": 4.753765932792584e-05, "loss": 5.0741, "step": 34000 }, { "epoch": 9.99, "learning_rate": 4.750144843568945e-05, "loss": 5.0205, "step": 34500 }, { "epoch": 10.14, "learning_rate": 4.7465237543453075e-05, "loss": 4.7512, "step": 35000 }, { "epoch": 10.28, "learning_rate": 4.742902665121669e-05, "loss": 4.9417, "step": 35500 }, { "epoch": 10.43, "learning_rate": 4.73928157589803e-05, "loss": 4.9746, "step": 36000 }, { "epoch": 10.57, "learning_rate": 4.735660486674392e-05, "loss": 4.747, "step": 36500 }, { "epoch": 10.72, "learning_rate": 4.732039397450753e-05, "loss": 4.6858, "step": 37000 }, { "epoch": 10.86, "learning_rate": 4.728418308227115e-05, "loss": 4.8208, "step": 37500 }, { "epoch": 11.01, "learning_rate": 4.7247972190034765e-05, "loss": 4.7271, "step": 38000 }, { "epoch": 11.15, "learning_rate": 4.721176129779838e-05, "loss": 4.6115, "step": 38500 }, { "epoch": 11.3, "learning_rate": 4.7175550405562e-05, "loss": 4.7304, "step": 39000 }, { "epoch": 11.44, "learning_rate": 4.7139339513325606e-05, "loss": 4.4773, "step": 39500 }, { "epoch": 11.59, "learning_rate": 4.710312862108923e-05, "loss": 4.6107, "step": 40000 }, { "epoch": 11.73, "learning_rate": 4.706691772885284e-05, "loss": 4.5442, "step": 40500 }, { "epoch": 11.88, "learning_rate": 4.7030706836616454e-05, "loss": 4.744, "step": 41000 }, { "epoch": 12.02, "learning_rate": 4.699449594438007e-05, "loss": 4.6054, "step": 41500 }, { "epoch": 12.17, "learning_rate": 4.695828505214369e-05, "loss": 4.6134, "step": 42000 }, { "epoch": 12.31, "learning_rate": 4.69220741599073e-05, "loss": 4.578, "step": 42500 }, { "epoch": 12.46, "learning_rate": 4.688586326767091e-05, "loss": 4.4292, "step": 43000 }, { "epoch": 12.6, "learning_rate": 4.6849652375434535e-05, "loss": 4.364, "step": 43500 }, { "epoch": 12.75, "learning_rate": 4.681344148319815e-05, "loss": 4.3788, "step": 44000 }, { "epoch": 12.89, "learning_rate": 4.677723059096176e-05, "loss": 4.4367, "step": 44500 }, { "epoch": 13.04, "learning_rate": 4.674101969872538e-05, "loss": 4.3541, "step": 45000 }, { "epoch": 13.18, "learning_rate": 4.670480880648899e-05, "loss": 4.2888, "step": 45500 }, { "epoch": 13.33, "learning_rate": 4.666859791425261e-05, "loss": 4.387, "step": 46000 }, { "epoch": 13.47, "learning_rate": 4.6632387022016225e-05, "loss": 4.3504, "step": 46500 }, { "epoch": 13.62, "learning_rate": 4.659617612977984e-05, "loss": 4.2987, "step": 47000 }, { "epoch": 13.76, "learning_rate": 4.655996523754346e-05, "loss": 4.3652, "step": 47500 }, { "epoch": 13.9, "learning_rate": 4.6523754345307066e-05, "loss": 4.4629, "step": 48000 }, { "epoch": 14.05, "learning_rate": 4.648754345307069e-05, "loss": 4.2083, "step": 48500 }, { "epoch": 14.19, "learning_rate": 4.64513325608343e-05, "loss": 4.2904, "step": 49000 }, { "epoch": 14.34, "learning_rate": 4.6415121668597915e-05, "loss": 4.2137, "step": 49500 }, { "epoch": 14.48, "learning_rate": 4.637891077636153e-05, "loss": 4.2921, "step": 50000 }, { "epoch": 14.63, "learning_rate": 4.634269988412515e-05, "loss": 4.2011, "step": 50500 }, { "epoch": 14.77, "learning_rate": 4.630648899188876e-05, "loss": 4.1971, "step": 51000 }, { "epoch": 14.92, "learning_rate": 4.627027809965237e-05, "loss": 4.2372, "step": 51500 }, { "epoch": 15.06, "learning_rate": 4.6234067207415995e-05, "loss": 4.1604, "step": 52000 }, { "epoch": 15.21, "learning_rate": 4.619785631517961e-05, "loss": 4.0747, "step": 52500 }, { "epoch": 15.35, "learning_rate": 4.616164542294322e-05, "loss": 4.1311, "step": 53000 }, { "epoch": 15.5, "learning_rate": 4.612543453070684e-05, "loss": 4.1403, "step": 53500 }, { "epoch": 15.64, "learning_rate": 4.608922363847045e-05, "loss": 4.2004, "step": 54000 }, { "epoch": 15.79, "learning_rate": 4.605301274623407e-05, "loss": 4.1078, "step": 54500 }, { "epoch": 15.93, "learning_rate": 4.6016801853997685e-05, "loss": 4.1072, "step": 55000 }, { "epoch": 16.08, "learning_rate": 4.59805909617613e-05, "loss": 4.1097, "step": 55500 }, { "epoch": 16.22, "learning_rate": 4.594438006952492e-05, "loss": 3.901, "step": 56000 }, { "epoch": 16.37, "learning_rate": 4.5908169177288526e-05, "loss": 4.0599, "step": 56500 }, { "epoch": 16.51, "learning_rate": 4.587195828505215e-05, "loss": 4.0458, "step": 57000 }, { "epoch": 16.66, "learning_rate": 4.583574739281576e-05, "loss": 4.0344, "step": 57500 }, { "epoch": 16.8, "learning_rate": 4.5799536500579375e-05, "loss": 3.9669, "step": 58000 }, { "epoch": 16.95, "learning_rate": 4.576332560834299e-05, "loss": 4.0911, "step": 58500 }, { "epoch": 17.09, "learning_rate": 4.572711471610661e-05, "loss": 4.0642, "step": 59000 }, { "epoch": 17.24, "learning_rate": 4.569090382387022e-05, "loss": 3.9691, "step": 59500 }, { "epoch": 17.38, "learning_rate": 4.565469293163384e-05, "loss": 3.8211, "step": 60000 }, { "epoch": 17.53, "learning_rate": 4.5618482039397455e-05, "loss": 3.8802, "step": 60500 }, { "epoch": 17.67, "learning_rate": 4.558227114716107e-05, "loss": 3.9179, "step": 61000 }, { "epoch": 17.82, "learning_rate": 4.554606025492468e-05, "loss": 3.9943, "step": 61500 }, { "epoch": 17.96, "learning_rate": 4.55098493626883e-05, "loss": 3.9248, "step": 62000 }, { "epoch": 18.11, "learning_rate": 4.547363847045191e-05, "loss": 3.8573, "step": 62500 }, { "epoch": 18.25, "learning_rate": 4.543742757821553e-05, "loss": 3.8063, "step": 63000 }, { "epoch": 18.4, "learning_rate": 4.5401216685979145e-05, "loss": 3.7963, "step": 63500 }, { "epoch": 18.54, "learning_rate": 4.536500579374276e-05, "loss": 3.923, "step": 64000 }, { "epoch": 18.68, "learning_rate": 4.532879490150638e-05, "loss": 3.817, "step": 64500 }, { "epoch": 18.83, "learning_rate": 4.5292584009269986e-05, "loss": 3.8602, "step": 65000 }, { "epoch": 18.97, "learning_rate": 4.525637311703361e-05, "loss": 3.6989, "step": 65500 }, { "epoch": 19.12, "learning_rate": 4.522016222479722e-05, "loss": 3.7462, "step": 66000 }, { "epoch": 19.26, "learning_rate": 4.5183951332560835e-05, "loss": 3.7723, "step": 66500 }, { "epoch": 19.41, "learning_rate": 4.514774044032445e-05, "loss": 3.8287, "step": 67000 }, { "epoch": 19.55, "learning_rate": 4.511152954808807e-05, "loss": 3.7813, "step": 67500 }, { "epoch": 19.7, "learning_rate": 4.507531865585168e-05, "loss": 3.893, "step": 68000 }, { "epoch": 19.84, "learning_rate": 4.50391077636153e-05, "loss": 3.8048, "step": 68500 }, { "epoch": 19.99, "learning_rate": 4.5002896871378915e-05, "loss": 3.7366, "step": 69000 }, { "epoch": 20.13, "learning_rate": 4.4966685979142524e-05, "loss": 3.6066, "step": 69500 }, { "epoch": 20.28, "learning_rate": 4.493047508690614e-05, "loss": 3.6848, "step": 70000 }, { "epoch": 20.42, "learning_rate": 4.4894264194669763e-05, "loss": 3.7469, "step": 70500 }, { "epoch": 20.57, "learning_rate": 4.485805330243337e-05, "loss": 3.7087, "step": 71000 }, { "epoch": 20.71, "learning_rate": 4.482184241019699e-05, "loss": 3.7463, "step": 71500 }, { "epoch": 20.86, "learning_rate": 4.4785631517960605e-05, "loss": 3.6861, "step": 72000 }, { "epoch": 21.0, "learning_rate": 4.474942062572422e-05, "loss": 3.7919, "step": 72500 }, { "epoch": 21.15, "learning_rate": 4.471320973348784e-05, "loss": 3.6995, "step": 73000 }, { "epoch": 21.29, "learning_rate": 4.4676998841251446e-05, "loss": 3.5774, "step": 73500 }, { "epoch": 21.44, "learning_rate": 4.464078794901507e-05, "loss": 3.6384, "step": 74000 }, { "epoch": 21.58, "learning_rate": 4.460457705677868e-05, "loss": 3.7398, "step": 74500 }, { "epoch": 21.73, "learning_rate": 4.4568366164542295e-05, "loss": 3.6163, "step": 75000 }, { "epoch": 21.87, "learning_rate": 4.453215527230592e-05, "loss": 3.6542, "step": 75500 }, { "epoch": 22.02, "learning_rate": 4.449594438006953e-05, "loss": 3.5554, "step": 76000 }, { "epoch": 22.16, "learning_rate": 4.445973348783314e-05, "loss": 3.4238, "step": 76500 }, { "epoch": 22.31, "learning_rate": 4.442352259559676e-05, "loss": 3.5048, "step": 77000 }, { "epoch": 22.45, "learning_rate": 4.4387311703360375e-05, "loss": 3.3922, "step": 77500 }, { "epoch": 22.6, "learning_rate": 4.4351100811123985e-05, "loss": 3.5757, "step": 78000 }, { "epoch": 22.74, "learning_rate": 4.43148899188876e-05, "loss": 3.6146, "step": 78500 }, { "epoch": 22.89, "learning_rate": 4.4278679026651224e-05, "loss": 3.4844, "step": 79000 }, { "epoch": 23.03, "learning_rate": 4.424246813441483e-05, "loss": 3.5872, "step": 79500 }, { "epoch": 23.17, "learning_rate": 4.420625724217845e-05, "loss": 3.336, "step": 80000 }, { "epoch": 23.32, "learning_rate": 4.4170046349942065e-05, "loss": 3.4509, "step": 80500 }, { "epoch": 23.46, "learning_rate": 4.413383545770568e-05, "loss": 3.5564, "step": 81000 }, { "epoch": 23.61, "learning_rate": 4.40976245654693e-05, "loss": 3.4449, "step": 81500 }, { "epoch": 23.75, "learning_rate": 4.4061413673232907e-05, "loss": 3.4913, "step": 82000 }, { "epoch": 23.9, "learning_rate": 4.402520278099653e-05, "loss": 3.6125, "step": 82500 }, { "epoch": 24.04, "learning_rate": 4.398899188876014e-05, "loss": 3.3634, "step": 83000 }, { "epoch": 24.19, "learning_rate": 4.3952780996523755e-05, "loss": 3.5246, "step": 83500 }, { "epoch": 24.33, "learning_rate": 4.391657010428738e-05, "loss": 3.3986, "step": 84000 }, { "epoch": 24.48, "learning_rate": 4.388035921205099e-05, "loss": 3.4707, "step": 84500 }, { "epoch": 24.62, "learning_rate": 4.38441483198146e-05, "loss": 3.4469, "step": 85000 }, { "epoch": 24.77, "learning_rate": 4.380793742757822e-05, "loss": 3.3893, "step": 85500 }, { "epoch": 24.91, "learning_rate": 4.3771726535341835e-05, "loss": 3.3876, "step": 86000 }, { "epoch": 25.06, "learning_rate": 4.3735515643105445e-05, "loss": 3.3913, "step": 86500 }, { "epoch": 25.2, "learning_rate": 4.369930475086906e-05, "loss": 3.472, "step": 87000 }, { "epoch": 25.35, "learning_rate": 4.3663093858632684e-05, "loss": 3.2559, "step": 87500 }, { "epoch": 25.49, "learning_rate": 4.362688296639629e-05, "loss": 3.461, "step": 88000 }, { "epoch": 25.64, "learning_rate": 4.359067207415991e-05, "loss": 3.3586, "step": 88500 }, { "epoch": 25.78, "learning_rate": 4.3554461181923525e-05, "loss": 3.2862, "step": 89000 }, { "epoch": 25.93, "learning_rate": 4.351825028968714e-05, "loss": 3.2737, "step": 89500 }, { "epoch": 26.07, "learning_rate": 4.348203939745076e-05, "loss": 3.2978, "step": 90000 }, { "epoch": 26.22, "learning_rate": 4.344582850521437e-05, "loss": 3.2151, "step": 90500 }, { "epoch": 26.36, "learning_rate": 4.340961761297799e-05, "loss": 3.339, "step": 91000 }, { "epoch": 26.51, "learning_rate": 4.33734067207416e-05, "loss": 3.2553, "step": 91500 }, { "epoch": 26.65, "learning_rate": 4.3337195828505215e-05, "loss": 3.1708, "step": 92000 }, { "epoch": 26.8, "learning_rate": 4.330098493626883e-05, "loss": 3.2419, "step": 92500 }, { "epoch": 26.94, "learning_rate": 4.326477404403245e-05, "loss": 3.3119, "step": 93000 }, { "epoch": 27.09, "learning_rate": 4.322856315179606e-05, "loss": 3.1761, "step": 93500 }, { "epoch": 27.23, "learning_rate": 4.319235225955968e-05, "loss": 3.0938, "step": 94000 }, { "epoch": 27.38, "learning_rate": 4.3156141367323295e-05, "loss": 3.2, "step": 94500 }, { "epoch": 27.52, "learning_rate": 4.3119930475086905e-05, "loss": 3.2952, "step": 95000 }, { "epoch": 27.67, "learning_rate": 4.308371958285052e-05, "loss": 3.2587, "step": 95500 }, { "epoch": 27.81, "learning_rate": 4.3047508690614144e-05, "loss": 3.1839, "step": 96000 }, { "epoch": 27.95, "learning_rate": 4.301129779837775e-05, "loss": 3.2991, "step": 96500 }, { "epoch": 28.1, "learning_rate": 4.297508690614137e-05, "loss": 3.2873, "step": 97000 }, { "epoch": 28.24, "learning_rate": 4.2938876013904985e-05, "loss": 3.2506, "step": 97500 }, { "epoch": 28.39, "learning_rate": 4.29026651216686e-05, "loss": 3.0849, "step": 98000 }, { "epoch": 28.53, "learning_rate": 4.286645422943222e-05, "loss": 3.1321, "step": 98500 }, { "epoch": 28.68, "learning_rate": 4.283024333719583e-05, "loss": 3.0935, "step": 99000 }, { "epoch": 28.82, "learning_rate": 4.279403244495945e-05, "loss": 3.1311, "step": 99500 }, { "epoch": 28.97, "learning_rate": 4.275782155272306e-05, "loss": 3.0079, "step": 100000 }, { "epoch": 29.11, "learning_rate": 4.2721610660486675e-05, "loss": 3.0359, "step": 100500 }, { "epoch": 29.26, "learning_rate": 4.268539976825029e-05, "loss": 3.1054, "step": 101000 }, { "epoch": 29.4, "learning_rate": 4.264918887601391e-05, "loss": 2.9851, "step": 101500 }, { "epoch": 29.55, "learning_rate": 4.261297798377752e-05, "loss": 3.0301, "step": 102000 }, { "epoch": 29.69, "learning_rate": 4.257676709154114e-05, "loss": 3.0815, "step": 102500 }, { "epoch": 29.84, "learning_rate": 4.2540556199304755e-05, "loss": 3.0711, "step": 103000 }, { "epoch": 29.98, "learning_rate": 4.2504345307068365e-05, "loss": 3.2656, "step": 103500 }, { "epoch": 30.13, "learning_rate": 4.246813441483198e-05, "loss": 2.9655, "step": 104000 }, { "epoch": 30.27, "learning_rate": 4.2431923522595604e-05, "loss": 2.9877, "step": 104500 }, { "epoch": 30.42, "learning_rate": 4.239571263035921e-05, "loss": 3.1679, "step": 105000 }, { "epoch": 30.56, "learning_rate": 4.235950173812283e-05, "loss": 2.9898, "step": 105500 }, { "epoch": 30.71, "learning_rate": 4.2323290845886445e-05, "loss": 3.0315, "step": 106000 }, { "epoch": 30.85, "learning_rate": 4.228707995365006e-05, "loss": 2.9994, "step": 106500 }, { "epoch": 31.0, "learning_rate": 4.225086906141367e-05, "loss": 3.0117, "step": 107000 }, { "epoch": 31.14, "learning_rate": 4.2214658169177294e-05, "loss": 2.9706, "step": 107500 }, { "epoch": 31.29, "learning_rate": 4.217844727694091e-05, "loss": 2.8996, "step": 108000 }, { "epoch": 31.43, "learning_rate": 4.214223638470452e-05, "loss": 2.8631, "step": 108500 }, { "epoch": 31.58, "learning_rate": 4.2106025492468135e-05, "loss": 3.0409, "step": 109000 }, { "epoch": 31.72, "learning_rate": 4.206981460023175e-05, "loss": 2.9536, "step": 109500 }, { "epoch": 31.87, "learning_rate": 4.203360370799537e-05, "loss": 2.9889, "step": 110000 }, { "epoch": 32.01, "learning_rate": 4.199739281575898e-05, "loss": 2.9418, "step": 110500 }, { "epoch": 32.16, "learning_rate": 4.19611819235226e-05, "loss": 2.9762, "step": 111000 }, { "epoch": 32.3, "learning_rate": 4.1924971031286216e-05, "loss": 2.8923, "step": 111500 }, { "epoch": 32.44, "learning_rate": 4.1888760139049825e-05, "loss": 2.8679, "step": 112000 }, { "epoch": 32.59, "learning_rate": 4.185254924681344e-05, "loss": 3.0042, "step": 112500 }, { "epoch": 32.73, "learning_rate": 4.1816338354577064e-05, "loss": 2.7931, "step": 113000 }, { "epoch": 32.88, "learning_rate": 4.178012746234067e-05, "loss": 3.0075, "step": 113500 }, { "epoch": 33.02, "learning_rate": 4.174391657010429e-05, "loss": 2.9121, "step": 114000 }, { "epoch": 33.17, "learning_rate": 4.1707705677867905e-05, "loss": 2.8189, "step": 114500 }, { "epoch": 33.31, "learning_rate": 4.167149478563152e-05, "loss": 2.7948, "step": 115000 }, { "epoch": 33.46, "learning_rate": 4.163528389339513e-05, "loss": 2.8265, "step": 115500 }, { "epoch": 33.6, "learning_rate": 4.1599073001158754e-05, "loss": 2.934, "step": 116000 }, { "epoch": 33.75, "learning_rate": 4.156286210892237e-05, "loss": 2.8438, "step": 116500 }, { "epoch": 33.89, "learning_rate": 4.152665121668598e-05, "loss": 2.9726, "step": 117000 }, { "epoch": 34.04, "learning_rate": 4.1490440324449595e-05, "loss": 2.7706, "step": 117500 }, { "epoch": 34.18, "learning_rate": 4.145422943221321e-05, "loss": 2.7422, "step": 118000 }, { "epoch": 34.33, "learning_rate": 4.141801853997683e-05, "loss": 2.7464, "step": 118500 }, { "epoch": 34.47, "learning_rate": 4.1381807647740443e-05, "loss": 2.842, "step": 119000 }, { "epoch": 34.62, "learning_rate": 4.134559675550406e-05, "loss": 2.7679, "step": 119500 }, { "epoch": 34.76, "learning_rate": 4.1309385863267676e-05, "loss": 2.8504, "step": 120000 }, { "epoch": 34.91, "learning_rate": 4.1273174971031285e-05, "loss": 2.7849, "step": 120500 }, { "epoch": 35.05, "learning_rate": 4.12369640787949e-05, "loss": 2.7947, "step": 121000 }, { "epoch": 35.2, "learning_rate": 4.120075318655852e-05, "loss": 2.6207, "step": 121500 }, { "epoch": 35.34, "learning_rate": 4.116454229432213e-05, "loss": 2.7135, "step": 122000 }, { "epoch": 35.49, "learning_rate": 4.112833140208575e-05, "loss": 2.796, "step": 122500 }, { "epoch": 35.63, "learning_rate": 4.1092120509849365e-05, "loss": 2.7093, "step": 123000 }, { "epoch": 35.78, "learning_rate": 4.105590961761298e-05, "loss": 2.722, "step": 123500 }, { "epoch": 35.92, "learning_rate": 4.101969872537659e-05, "loss": 2.6685, "step": 124000 }, { "epoch": 36.07, "learning_rate": 4.0983487833140214e-05, "loss": 2.8914, "step": 124500 }, { "epoch": 36.21, "learning_rate": 4.094727694090383e-05, "loss": 2.7502, "step": 125000 }, { "epoch": 36.36, "learning_rate": 4.091106604866744e-05, "loss": 2.6001, "step": 125500 }, { "epoch": 36.5, "learning_rate": 4.0874855156431055e-05, "loss": 2.6482, "step": 126000 }, { "epoch": 36.65, "learning_rate": 4.083864426419467e-05, "loss": 2.5456, "step": 126500 }, { "epoch": 36.79, "learning_rate": 4.080243337195829e-05, "loss": 2.7484, "step": 127000 }, { "epoch": 36.94, "learning_rate": 4.0766222479721903e-05, "loss": 2.706, "step": 127500 }, { "epoch": 37.08, "learning_rate": 4.073001158748552e-05, "loss": 2.6221, "step": 128000 }, { "epoch": 37.22, "learning_rate": 4.0693800695249136e-05, "loss": 2.5343, "step": 128500 }, { "epoch": 37.37, "learning_rate": 4.0657589803012745e-05, "loss": 2.5271, "step": 129000 }, { "epoch": 37.51, "learning_rate": 4.062137891077636e-05, "loss": 2.6493, "step": 129500 }, { "epoch": 37.66, "learning_rate": 4.058516801853998e-05, "loss": 2.6699, "step": 130000 }, { "epoch": 37.8, "learning_rate": 4.054895712630359e-05, "loss": 2.6375, "step": 130500 }, { "epoch": 37.95, "learning_rate": 4.051274623406721e-05, "loss": 2.6659, "step": 131000 }, { "epoch": 38.09, "learning_rate": 4.0476535341830825e-05, "loss": 2.6404, "step": 131500 }, { "epoch": 38.24, "learning_rate": 4.044032444959444e-05, "loss": 2.4527, "step": 132000 }, { "epoch": 38.38, "learning_rate": 4.040411355735805e-05, "loss": 2.4812, "step": 132500 }, { "epoch": 38.53, "learning_rate": 4.0367902665121674e-05, "loss": 2.675, "step": 133000 }, { "epoch": 38.67, "learning_rate": 4.033169177288529e-05, "loss": 2.6116, "step": 133500 }, { "epoch": 38.82, "learning_rate": 4.02954808806489e-05, "loss": 2.5089, "step": 134000 }, { "epoch": 38.96, "learning_rate": 4.0259269988412515e-05, "loss": 2.6363, "step": 134500 }, { "epoch": 39.11, "learning_rate": 4.022305909617613e-05, "loss": 2.4868, "step": 135000 }, { "epoch": 39.25, "learning_rate": 4.018684820393975e-05, "loss": 2.4665, "step": 135500 }, { "epoch": 39.4, "learning_rate": 4.015063731170336e-05, "loss": 2.5217, "step": 136000 }, { "epoch": 39.54, "learning_rate": 4.011442641946698e-05, "loss": 2.5457, "step": 136500 }, { "epoch": 39.69, "learning_rate": 4.0078215527230596e-05, "loss": 2.5308, "step": 137000 }, { "epoch": 39.83, "learning_rate": 4.0042004634994205e-05, "loss": 2.5148, "step": 137500 }, { "epoch": 39.98, "learning_rate": 4.000579374275782e-05, "loss": 2.552, "step": 138000 }, { "epoch": 40.12, "learning_rate": 3.996958285052144e-05, "loss": 2.5229, "step": 138500 }, { "epoch": 40.27, "learning_rate": 3.993337195828505e-05, "loss": 2.3691, "step": 139000 }, { "epoch": 40.41, "learning_rate": 3.989716106604867e-05, "loss": 2.4165, "step": 139500 }, { "epoch": 40.56, "learning_rate": 3.9860950173812286e-05, "loss": 2.5134, "step": 140000 }, { "epoch": 40.7, "learning_rate": 3.98247392815759e-05, "loss": 2.4526, "step": 140500 }, { "epoch": 40.85, "learning_rate": 3.978852838933951e-05, "loss": 2.4467, "step": 141000 }, { "epoch": 40.99, "learning_rate": 3.9752317497103134e-05, "loss": 2.4586, "step": 141500 }, { "epoch": 41.14, "learning_rate": 3.971610660486675e-05, "loss": 2.2715, "step": 142000 }, { "epoch": 41.28, "learning_rate": 3.967989571263036e-05, "loss": 2.4122, "step": 142500 }, { "epoch": 41.43, "learning_rate": 3.9643684820393975e-05, "loss": 2.5038, "step": 143000 }, { "epoch": 41.57, "learning_rate": 3.960747392815759e-05, "loss": 2.4213, "step": 143500 }, { "epoch": 41.71, "learning_rate": 3.957126303592121e-05, "loss": 2.3776, "step": 144000 }, { "epoch": 41.86, "learning_rate": 3.953505214368482e-05, "loss": 2.4879, "step": 144500 }, { "epoch": 42.0, "learning_rate": 3.949884125144844e-05, "loss": 2.3793, "step": 145000 }, { "epoch": 42.15, "learning_rate": 3.9462630359212056e-05, "loss": 2.3577, "step": 145500 }, { "epoch": 42.29, "learning_rate": 3.9426419466975665e-05, "loss": 2.2899, "step": 146000 }, { "epoch": 42.44, "learning_rate": 3.939020857473928e-05, "loss": 2.2713, "step": 146500 }, { "epoch": 42.58, "learning_rate": 3.93539976825029e-05, "loss": 2.4175, "step": 147000 }, { "epoch": 42.73, "learning_rate": 3.9317786790266513e-05, "loss": 2.3225, "step": 147500 }, { "epoch": 42.87, "learning_rate": 3.928157589803013e-05, "loss": 2.4637, "step": 148000 }, { "epoch": 43.02, "learning_rate": 3.9245365005793746e-05, "loss": 2.4782, "step": 148500 }, { "epoch": 43.16, "learning_rate": 3.920915411355736e-05, "loss": 2.2441, "step": 149000 }, { "epoch": 43.31, "learning_rate": 3.917294322132097e-05, "loss": 2.3654, "step": 149500 }, { "epoch": 43.45, "learning_rate": 3.9136732329084594e-05, "loss": 2.3439, "step": 150000 }, { "epoch": 43.6, "learning_rate": 3.91005214368482e-05, "loss": 2.1964, "step": 150500 }, { "epoch": 43.74, "learning_rate": 3.906431054461182e-05, "loss": 2.3778, "step": 151000 }, { "epoch": 43.89, "learning_rate": 3.9028099652375435e-05, "loss": 2.3038, "step": 151500 }, { "epoch": 44.03, "learning_rate": 3.899188876013905e-05, "loss": 2.3877, "step": 152000 }, { "epoch": 44.18, "learning_rate": 3.895567786790267e-05, "loss": 2.2923, "step": 152500 }, { "epoch": 44.32, "learning_rate": 3.891946697566628e-05, "loss": 2.1665, "step": 153000 }, { "epoch": 44.47, "learning_rate": 3.88832560834299e-05, "loss": 2.2541, "step": 153500 }, { "epoch": 44.61, "learning_rate": 3.8847045191193516e-05, "loss": 2.3349, "step": 154000 }, { "epoch": 44.76, "learning_rate": 3.8810834298957125e-05, "loss": 2.1701, "step": 154500 }, { "epoch": 44.9, "learning_rate": 3.877462340672075e-05, "loss": 2.3005, "step": 155000 }, { "epoch": 45.05, "learning_rate": 3.873841251448436e-05, "loss": 2.2592, "step": 155500 }, { "epoch": 45.19, "learning_rate": 3.8702201622247974e-05, "loss": 2.1503, "step": 156000 }, { "epoch": 45.34, "learning_rate": 3.866599073001159e-05, "loss": 2.2068, "step": 156500 }, { "epoch": 45.48, "learning_rate": 3.8629779837775206e-05, "loss": 2.2581, "step": 157000 }, { "epoch": 45.63, "learning_rate": 3.859356894553882e-05, "loss": 2.207, "step": 157500 }, { "epoch": 45.77, "learning_rate": 3.855735805330243e-05, "loss": 2.2473, "step": 158000 }, { "epoch": 45.92, "learning_rate": 3.8521147161066054e-05, "loss": 2.3088, "step": 158500 }, { "epoch": 46.06, "learning_rate": 3.848493626882966e-05, "loss": 2.199, "step": 159000 }, { "epoch": 46.21, "learning_rate": 3.844872537659328e-05, "loss": 2.1469, "step": 159500 }, { "epoch": 46.35, "learning_rate": 3.8412514484356895e-05, "loss": 2.2277, "step": 160000 }, { "epoch": 46.49, "learning_rate": 3.837630359212051e-05, "loss": 2.0807, "step": 160500 }, { "epoch": 46.64, "learning_rate": 3.834009269988413e-05, "loss": 2.1421, "step": 161000 }, { "epoch": 46.78, "learning_rate": 3.830388180764774e-05, "loss": 2.0924, "step": 161500 }, { "epoch": 46.93, "learning_rate": 3.826767091541136e-05, "loss": 2.2633, "step": 162000 }, { "epoch": 47.07, "learning_rate": 3.8231460023174976e-05, "loss": 2.1603, "step": 162500 }, { "epoch": 47.22, "learning_rate": 3.8195249130938585e-05, "loss": 2.1038, "step": 163000 }, { "epoch": 47.36, "learning_rate": 3.815903823870221e-05, "loss": 2.081, "step": 163500 }, { "epoch": 47.51, "learning_rate": 3.812282734646582e-05, "loss": 2.0259, "step": 164000 }, { "epoch": 47.65, "learning_rate": 3.8086616454229434e-05, "loss": 2.0775, "step": 164500 }, { "epoch": 47.8, "learning_rate": 3.805040556199305e-05, "loss": 2.119, "step": 165000 }, { "epoch": 47.94, "learning_rate": 3.8014194669756666e-05, "loss": 2.0324, "step": 165500 }, { "epoch": 48.09, "learning_rate": 3.797798377752028e-05, "loss": 2.0463, "step": 166000 }, { "epoch": 48.23, "learning_rate": 3.794177288528389e-05, "loss": 1.9982, "step": 166500 }, { "epoch": 48.38, "learning_rate": 3.7905561993047514e-05, "loss": 2.1069, "step": 167000 }, { "epoch": 48.52, "learning_rate": 3.786935110081112e-05, "loss": 2.0855, "step": 167500 }, { "epoch": 48.67, "learning_rate": 3.783314020857474e-05, "loss": 2.0103, "step": 168000 }, { "epoch": 48.81, "learning_rate": 3.7796929316338356e-05, "loss": 2.1315, "step": 168500 }, { "epoch": 48.96, "learning_rate": 3.776071842410197e-05, "loss": 2.2163, "step": 169000 }, { "epoch": 49.1, "learning_rate": 3.772450753186559e-05, "loss": 2.0169, "step": 169500 }, { "epoch": 49.25, "learning_rate": 3.76882966396292e-05, "loss": 1.9853, "step": 170000 }, { "epoch": 49.39, "learning_rate": 3.765208574739282e-05, "loss": 2.0007, "step": 170500 }, { "epoch": 49.54, "learning_rate": 3.7615874855156436e-05, "loss": 1.9034, "step": 171000 }, { "epoch": 49.68, "learning_rate": 3.7579663962920045e-05, "loss": 2.0278, "step": 171500 }, { "epoch": 49.83, "learning_rate": 3.754345307068367e-05, "loss": 2.0554, "step": 172000 }, { "epoch": 49.97, "learning_rate": 3.750724217844728e-05, "loss": 2.0944, "step": 172500 }, { "epoch": 50.12, "learning_rate": 3.7471031286210894e-05, "loss": 1.9666, "step": 173000 }, { "epoch": 50.26, "learning_rate": 3.743482039397451e-05, "loss": 1.9237, "step": 173500 }, { "epoch": 50.41, "learning_rate": 3.7398609501738126e-05, "loss": 1.9957, "step": 174000 }, { "epoch": 50.55, "learning_rate": 3.736239860950174e-05, "loss": 1.9975, "step": 174500 }, { "epoch": 50.7, "learning_rate": 3.732618771726535e-05, "loss": 1.923, "step": 175000 }, { "epoch": 50.84, "learning_rate": 3.7289976825028974e-05, "loss": 2.0131, "step": 175500 }, { "epoch": 50.98, "learning_rate": 3.7253765932792583e-05, "loss": 2.0053, "step": 176000 }, { "epoch": 51.13, "learning_rate": 3.72175550405562e-05, "loss": 1.9155, "step": 176500 }, { "epoch": 51.27, "learning_rate": 3.7181344148319816e-05, "loss": 1.8302, "step": 177000 }, { "epoch": 51.42, "learning_rate": 3.714513325608343e-05, "loss": 1.8982, "step": 177500 }, { "epoch": 51.56, "learning_rate": 3.710892236384705e-05, "loss": 1.9291, "step": 178000 }, { "epoch": 51.71, "learning_rate": 3.707271147161066e-05, "loss": 1.9618, "step": 178500 }, { "epoch": 51.85, "learning_rate": 3.703650057937428e-05, "loss": 1.906, "step": 179000 }, { "epoch": 52.0, "learning_rate": 3.700028968713789e-05, "loss": 1.9577, "step": 179500 }, { "epoch": 52.14, "learning_rate": 3.6964078794901505e-05, "loss": 1.9181, "step": 180000 }, { "epoch": 52.29, "learning_rate": 3.692786790266513e-05, "loss": 1.8794, "step": 180500 }, { "epoch": 52.43, "learning_rate": 3.689165701042874e-05, "loss": 1.8926, "step": 181000 }, { "epoch": 52.58, "learning_rate": 3.6855446118192354e-05, "loss": 1.9498, "step": 181500 }, { "epoch": 52.72, "learning_rate": 3.681923522595597e-05, "loss": 1.8301, "step": 182000 }, { "epoch": 52.87, "learning_rate": 3.6783024333719586e-05, "loss": 1.8718, "step": 182500 }, { "epoch": 53.01, "learning_rate": 3.67468134414832e-05, "loss": 1.9002, "step": 183000 }, { "epoch": 53.16, "learning_rate": 3.671060254924681e-05, "loss": 1.7962, "step": 183500 }, { "epoch": 53.3, "learning_rate": 3.6674391657010434e-05, "loss": 1.7957, "step": 184000 }, { "epoch": 53.45, "learning_rate": 3.6638180764774044e-05, "loss": 1.8693, "step": 184500 }, { "epoch": 53.59, "learning_rate": 3.660196987253766e-05, "loss": 1.8591, "step": 185000 }, { "epoch": 53.74, "learning_rate": 3.6565758980301276e-05, "loss": 1.8654, "step": 185500 }, { "epoch": 53.88, "learning_rate": 3.652954808806489e-05, "loss": 1.8288, "step": 186000 }, { "epoch": 54.03, "learning_rate": 3.649333719582851e-05, "loss": 1.9124, "step": 186500 }, { "epoch": 54.17, "learning_rate": 3.6457126303592124e-05, "loss": 1.7429, "step": 187000 }, { "epoch": 54.32, "learning_rate": 3.642091541135574e-05, "loss": 1.8316, "step": 187500 }, { "epoch": 54.46, "learning_rate": 3.638470451911935e-05, "loss": 1.7558, "step": 188000 }, { "epoch": 54.61, "learning_rate": 3.6348493626882966e-05, "loss": 1.8358, "step": 188500 }, { "epoch": 54.75, "learning_rate": 3.631228273464659e-05, "loss": 1.7743, "step": 189000 }, { "epoch": 54.9, "learning_rate": 3.62760718424102e-05, "loss": 1.8545, "step": 189500 }, { "epoch": 55.04, "learning_rate": 3.6239860950173814e-05, "loss": 1.7785, "step": 190000 }, { "epoch": 55.19, "learning_rate": 3.620365005793743e-05, "loss": 1.764, "step": 190500 }, { "epoch": 55.33, "learning_rate": 3.6167439165701046e-05, "loss": 1.8189, "step": 191000 }, { "epoch": 55.48, "learning_rate": 3.613122827346466e-05, "loss": 1.6939, "step": 191500 }, { "epoch": 55.62, "learning_rate": 3.609501738122827e-05, "loss": 1.7771, "step": 192000 }, { "epoch": 55.76, "learning_rate": 3.6058806488991894e-05, "loss": 1.7576, "step": 192500 }, { "epoch": 55.91, "learning_rate": 3.6022595596755504e-05, "loss": 1.8118, "step": 193000 }, { "epoch": 56.05, "learning_rate": 3.598638470451912e-05, "loss": 1.7491, "step": 193500 }, { "epoch": 56.2, "learning_rate": 3.5950173812282736e-05, "loss": 1.7124, "step": 194000 }, { "epoch": 56.34, "learning_rate": 3.591396292004635e-05, "loss": 1.7616, "step": 194500 }, { "epoch": 56.49, "learning_rate": 3.587775202780997e-05, "loss": 1.7315, "step": 195000 }, { "epoch": 56.63, "learning_rate": 3.5841541135573584e-05, "loss": 1.6236, "step": 195500 }, { "epoch": 56.78, "learning_rate": 3.58053302433372e-05, "loss": 1.7692, "step": 196000 }, { "epoch": 56.92, "learning_rate": 3.576911935110081e-05, "loss": 1.6878, "step": 196500 }, { "epoch": 57.07, "learning_rate": 3.5732908458864426e-05, "loss": 1.6287, "step": 197000 }, { "epoch": 57.21, "learning_rate": 3.569669756662805e-05, "loss": 1.6295, "step": 197500 }, { "epoch": 57.36, "learning_rate": 3.566048667439166e-05, "loss": 1.712, "step": 198000 }, { "epoch": 57.5, "learning_rate": 3.5624275782155274e-05, "loss": 1.6799, "step": 198500 }, { "epoch": 57.65, "learning_rate": 3.558806488991889e-05, "loss": 1.7199, "step": 199000 }, { "epoch": 57.79, "learning_rate": 3.5551853997682506e-05, "loss": 1.6429, "step": 199500 }, { "epoch": 57.94, "learning_rate": 3.551564310544612e-05, "loss": 1.7739, "step": 200000 }, { "epoch": 58.08, "learning_rate": 3.547943221320973e-05, "loss": 1.5893, "step": 200500 }, { "epoch": 58.23, "learning_rate": 3.5443221320973354e-05, "loss": 1.7382, "step": 201000 }, { "epoch": 58.37, "learning_rate": 3.5407010428736964e-05, "loss": 1.587, "step": 201500 }, { "epoch": 58.52, "learning_rate": 3.537079953650058e-05, "loss": 1.6118, "step": 202000 }, { "epoch": 58.66, "learning_rate": 3.5334588644264196e-05, "loss": 1.5871, "step": 202500 }, { "epoch": 58.81, "learning_rate": 3.529837775202781e-05, "loss": 1.5877, "step": 203000 }, { "epoch": 58.95, "learning_rate": 3.526216685979143e-05, "loss": 1.6959, "step": 203500 }, { "epoch": 59.1, "learning_rate": 3.5225955967555044e-05, "loss": 1.6962, "step": 204000 }, { "epoch": 59.24, "learning_rate": 3.518974507531866e-05, "loss": 1.6245, "step": 204500 }, { "epoch": 59.39, "learning_rate": 3.515353418308227e-05, "loss": 1.6294, "step": 205000 }, { "epoch": 59.53, "learning_rate": 3.5117323290845886e-05, "loss": 1.6275, "step": 205500 }, { "epoch": 59.68, "learning_rate": 3.508111239860951e-05, "loss": 1.6681, "step": 206000 }, { "epoch": 59.82, "learning_rate": 3.504490150637312e-05, "loss": 1.5596, "step": 206500 }, { "epoch": 59.97, "learning_rate": 3.5008690614136734e-05, "loss": 1.629, "step": 207000 }, { "epoch": 60.11, "learning_rate": 3.497247972190035e-05, "loss": 1.6084, "step": 207500 }, { "epoch": 60.25, "learning_rate": 3.4936268829663966e-05, "loss": 1.5486, "step": 208000 }, { "epoch": 60.4, "learning_rate": 3.4900057937427575e-05, "loss": 1.5647, "step": 208500 }, { "epoch": 60.54, "learning_rate": 3.486384704519119e-05, "loss": 1.5691, "step": 209000 }, { "epoch": 60.69, "learning_rate": 3.4827636152954814e-05, "loss": 1.5658, "step": 209500 }, { "epoch": 60.83, "learning_rate": 3.4791425260718424e-05, "loss": 1.5257, "step": 210000 }, { "epoch": 60.98, "learning_rate": 3.475521436848204e-05, "loss": 1.5903, "step": 210500 }, { "epoch": 61.12, "learning_rate": 3.4719003476245656e-05, "loss": 1.514, "step": 211000 }, { "epoch": 61.27, "learning_rate": 3.468279258400927e-05, "loss": 1.4983, "step": 211500 }, { "epoch": 61.41, "learning_rate": 3.464658169177289e-05, "loss": 1.5336, "step": 212000 }, { "epoch": 61.56, "learning_rate": 3.4610370799536504e-05, "loss": 1.5524, "step": 212500 }, { "epoch": 61.7, "learning_rate": 3.457415990730012e-05, "loss": 1.5885, "step": 213000 }, { "epoch": 61.85, "learning_rate": 3.453794901506373e-05, "loss": 1.5389, "step": 213500 }, { "epoch": 61.99, "learning_rate": 3.4501738122827346e-05, "loss": 1.5126, "step": 214000 }, { "epoch": 62.14, "learning_rate": 3.446552723059097e-05, "loss": 1.4495, "step": 214500 }, { "epoch": 62.28, "learning_rate": 3.442931633835458e-05, "loss": 1.4506, "step": 215000 }, { "epoch": 62.43, "learning_rate": 3.4393105446118194e-05, "loss": 1.4459, "step": 215500 }, { "epoch": 62.57, "learning_rate": 3.435689455388181e-05, "loss": 1.5571, "step": 216000 }, { "epoch": 62.72, "learning_rate": 3.4320683661645426e-05, "loss": 1.5158, "step": 216500 }, { "epoch": 62.86, "learning_rate": 3.4284472769409036e-05, "loss": 1.5493, "step": 217000 }, { "epoch": 63.01, "learning_rate": 3.424826187717265e-05, "loss": 1.5083, "step": 217500 }, { "epoch": 63.15, "learning_rate": 3.4212050984936275e-05, "loss": 1.4297, "step": 218000 }, { "epoch": 63.3, "learning_rate": 3.4175840092699884e-05, "loss": 1.4847, "step": 218500 }, { "epoch": 63.44, "learning_rate": 3.41396292004635e-05, "loss": 1.446, "step": 219000 }, { "epoch": 63.59, "learning_rate": 3.4103418308227116e-05, "loss": 1.4292, "step": 219500 }, { "epoch": 63.73, "learning_rate": 3.406720741599073e-05, "loss": 1.5258, "step": 220000 }, { "epoch": 63.88, "learning_rate": 3.403099652375435e-05, "loss": 1.4968, "step": 220500 }, { "epoch": 64.02, "learning_rate": 3.3994785631517964e-05, "loss": 1.4877, "step": 221000 }, { "epoch": 64.17, "learning_rate": 3.395857473928158e-05, "loss": 1.4667, "step": 221500 }, { "epoch": 64.31, "learning_rate": 3.392236384704519e-05, "loss": 1.4413, "step": 222000 }, { "epoch": 64.46, "learning_rate": 3.3886152954808806e-05, "loss": 1.4228, "step": 222500 }, { "epoch": 64.6, "learning_rate": 3.384994206257243e-05, "loss": 1.4158, "step": 223000 }, { "epoch": 64.75, "learning_rate": 3.381373117033604e-05, "loss": 1.376, "step": 223500 }, { "epoch": 64.89, "learning_rate": 3.3777520278099654e-05, "loss": 1.4359, "step": 224000 }, { "epoch": 65.03, "learning_rate": 3.374130938586327e-05, "loss": 1.4994, "step": 224500 }, { "epoch": 65.18, "learning_rate": 3.3705098493626886e-05, "loss": 1.3646, "step": 225000 }, { "epoch": 65.32, "learning_rate": 3.3668887601390496e-05, "loss": 1.5025, "step": 225500 }, { "epoch": 65.47, "learning_rate": 3.363267670915411e-05, "loss": 1.4077, "step": 226000 }, { "epoch": 65.61, "learning_rate": 3.3596465816917735e-05, "loss": 1.3323, "step": 226500 }, { "epoch": 65.76, "learning_rate": 3.3560254924681344e-05, "loss": 1.4387, "step": 227000 }, { "epoch": 65.9, "learning_rate": 3.352404403244496e-05, "loss": 1.3418, "step": 227500 }, { "epoch": 66.05, "learning_rate": 3.3487833140208576e-05, "loss": 1.2793, "step": 228000 }, { "epoch": 66.19, "learning_rate": 3.345162224797219e-05, "loss": 1.3484, "step": 228500 }, { "epoch": 66.34, "learning_rate": 3.341541135573581e-05, "loss": 1.3419, "step": 229000 }, { "epoch": 66.48, "learning_rate": 3.3379200463499424e-05, "loss": 1.308, "step": 229500 }, { "epoch": 66.63, "learning_rate": 3.334298957126304e-05, "loss": 1.3193, "step": 230000 }, { "epoch": 66.77, "learning_rate": 3.330677867902665e-05, "loss": 1.4064, "step": 230500 }, { "epoch": 66.92, "learning_rate": 3.3270567786790266e-05, "loss": 1.3766, "step": 231000 }, { "epoch": 67.06, "learning_rate": 3.323435689455388e-05, "loss": 1.3482, "step": 231500 }, { "epoch": 67.21, "learning_rate": 3.31981460023175e-05, "loss": 1.348, "step": 232000 }, { "epoch": 67.35, "learning_rate": 3.3161935110081114e-05, "loss": 1.2824, "step": 232500 }, { "epoch": 67.5, "learning_rate": 3.312572421784473e-05, "loss": 1.2908, "step": 233000 }, { "epoch": 67.64, "learning_rate": 3.3089513325608346e-05, "loss": 1.4077, "step": 233500 }, { "epoch": 67.79, "learning_rate": 3.3053302433371956e-05, "loss": 1.3023, "step": 234000 }, { "epoch": 67.93, "learning_rate": 3.301709154113558e-05, "loss": 1.422, "step": 234500 }, { "epoch": 68.08, "learning_rate": 3.2980880648899195e-05, "loss": 1.3004, "step": 235000 }, { "epoch": 68.22, "learning_rate": 3.2944669756662804e-05, "loss": 1.2346, "step": 235500 }, { "epoch": 68.37, "learning_rate": 3.290845886442642e-05, "loss": 1.2779, "step": 236000 }, { "epoch": 68.51, "learning_rate": 3.2872247972190036e-05, "loss": 1.2945, "step": 236500 }, { "epoch": 68.66, "learning_rate": 3.283603707995365e-05, "loss": 1.2413, "step": 237000 }, { "epoch": 68.8, "learning_rate": 3.279982618771727e-05, "loss": 1.3038, "step": 237500 }, { "epoch": 68.95, "learning_rate": 3.2763615295480884e-05, "loss": 1.3092, "step": 238000 }, { "epoch": 69.09, "learning_rate": 3.27274044032445e-05, "loss": 1.2456, "step": 238500 }, { "epoch": 69.24, "learning_rate": 3.269119351100811e-05, "loss": 1.2915, "step": 239000 }, { "epoch": 69.38, "learning_rate": 3.2654982618771726e-05, "loss": 1.2733, "step": 239500 }, { "epoch": 69.52, "learning_rate": 3.261877172653534e-05, "loss": 1.2595, "step": 240000 }, { "epoch": 69.67, "learning_rate": 3.258256083429896e-05, "loss": 1.2344, "step": 240500 }, { "epoch": 69.81, "learning_rate": 3.2546349942062574e-05, "loss": 1.2483, "step": 241000 }, { "epoch": 69.96, "learning_rate": 3.251013904982619e-05, "loss": 1.3016, "step": 241500 }, { "epoch": 70.1, "learning_rate": 3.2473928157589806e-05, "loss": 1.1828, "step": 242000 }, { "epoch": 70.25, "learning_rate": 3.2437717265353416e-05, "loss": 1.2399, "step": 242500 }, { "epoch": 70.39, "learning_rate": 3.240150637311704e-05, "loss": 1.2375, "step": 243000 }, { "epoch": 70.54, "learning_rate": 3.2365295480880655e-05, "loss": 1.2156, "step": 243500 }, { "epoch": 70.68, "learning_rate": 3.2329084588644264e-05, "loss": 1.2149, "step": 244000 }, { "epoch": 70.83, "learning_rate": 3.229287369640788e-05, "loss": 1.2616, "step": 244500 }, { "epoch": 70.97, "learning_rate": 3.2256662804171496e-05, "loss": 1.2711, "step": 245000 }, { "epoch": 71.12, "learning_rate": 3.222045191193511e-05, "loss": 1.1306, "step": 245500 }, { "epoch": 71.26, "learning_rate": 3.218424101969872e-05, "loss": 1.1036, "step": 246000 }, { "epoch": 71.41, "learning_rate": 3.2148030127462345e-05, "loss": 1.1606, "step": 246500 }, { "epoch": 71.55, "learning_rate": 3.211181923522596e-05, "loss": 1.2608, "step": 247000 }, { "epoch": 71.7, "learning_rate": 3.207560834298957e-05, "loss": 1.2569, "step": 247500 }, { "epoch": 71.84, "learning_rate": 3.2039397450753186e-05, "loss": 1.2382, "step": 248000 }, { "epoch": 71.99, "learning_rate": 3.20031865585168e-05, "loss": 1.1877, "step": 248500 }, { "epoch": 72.13, "learning_rate": 3.196697566628042e-05, "loss": 1.2093, "step": 249000 }, { "epoch": 72.28, "learning_rate": 3.1930764774044034e-05, "loss": 1.133, "step": 249500 }, { "epoch": 72.42, "learning_rate": 3.189455388180765e-05, "loss": 1.1242, "step": 250000 }, { "epoch": 72.57, "learning_rate": 3.1858342989571267e-05, "loss": 1.1529, "step": 250500 }, { "epoch": 72.71, "learning_rate": 3.1822132097334876e-05, "loss": 1.1561, "step": 251000 }, { "epoch": 72.86, "learning_rate": 3.17859212050985e-05, "loss": 1.1941, "step": 251500 }, { "epoch": 73.0, "learning_rate": 3.1749710312862115e-05, "loss": 1.2591, "step": 252000 }, { "epoch": 73.15, "learning_rate": 3.1713499420625724e-05, "loss": 1.0791, "step": 252500 }, { "epoch": 73.29, "learning_rate": 3.167728852838934e-05, "loss": 1.1471, "step": 253000 }, { "epoch": 73.44, "learning_rate": 3.1641077636152956e-05, "loss": 1.0509, "step": 253500 }, { "epoch": 73.58, "learning_rate": 3.160486674391657e-05, "loss": 1.1248, "step": 254000 }, { "epoch": 73.73, "learning_rate": 3.156865585168018e-05, "loss": 1.1184, "step": 254500 }, { "epoch": 73.87, "learning_rate": 3.1532444959443805e-05, "loss": 1.2078, "step": 255000 }, { "epoch": 74.02, "learning_rate": 3.149623406720742e-05, "loss": 1.1616, "step": 255500 }, { "epoch": 74.16, "learning_rate": 3.146002317497103e-05, "loss": 1.0971, "step": 256000 }, { "epoch": 74.3, "learning_rate": 3.1423812282734646e-05, "loss": 1.1006, "step": 256500 }, { "epoch": 74.45, "learning_rate": 3.138760139049826e-05, "loss": 1.1215, "step": 257000 }, { "epoch": 74.59, "learning_rate": 3.135139049826188e-05, "loss": 1.1527, "step": 257500 }, { "epoch": 74.74, "learning_rate": 3.1315179606025494e-05, "loss": 1.1213, "step": 258000 }, { "epoch": 74.88, "learning_rate": 3.127896871378911e-05, "loss": 1.1626, "step": 258500 }, { "epoch": 75.03, "learning_rate": 3.1242757821552727e-05, "loss": 1.0862, "step": 259000 }, { "epoch": 75.17, "learning_rate": 3.1206546929316336e-05, "loss": 1.0992, "step": 259500 }, { "epoch": 75.32, "learning_rate": 3.117033603707996e-05, "loss": 1.0891, "step": 260000 }, { "epoch": 75.46, "learning_rate": 3.113412514484357e-05, "loss": 1.0892, "step": 260500 }, { "epoch": 75.61, "learning_rate": 3.1097914252607184e-05, "loss": 1.0949, "step": 261000 }, { "epoch": 75.75, "learning_rate": 3.10617033603708e-05, "loss": 1.0752, "step": 261500 }, { "epoch": 75.9, "learning_rate": 3.1025492468134416e-05, "loss": 1.1233, "step": 262000 }, { "epoch": 76.04, "learning_rate": 3.098928157589803e-05, "loss": 1.1272, "step": 262500 }, { "epoch": 76.19, "learning_rate": 3.095307068366164e-05, "loss": 1.064, "step": 263000 }, { "epoch": 76.33, "learning_rate": 3.0916859791425265e-05, "loss": 1.0854, "step": 263500 }, { "epoch": 76.48, "learning_rate": 3.088064889918888e-05, "loss": 1.0471, "step": 264000 }, { "epoch": 76.62, "learning_rate": 3.084443800695249e-05, "loss": 1.0993, "step": 264500 }, { "epoch": 76.77, "learning_rate": 3.0808227114716106e-05, "loss": 1.1075, "step": 265000 }, { "epoch": 76.91, "learning_rate": 3.077201622247972e-05, "loss": 1.0724, "step": 265500 }, { "epoch": 77.06, "learning_rate": 3.073580533024334e-05, "loss": 1.0793, "step": 266000 }, { "epoch": 77.2, "learning_rate": 3.0699594438006954e-05, "loss": 1.0535, "step": 266500 }, { "epoch": 77.35, "learning_rate": 3.066338354577057e-05, "loss": 0.9771, "step": 267000 }, { "epoch": 77.49, "learning_rate": 3.062717265353419e-05, "loss": 1.0115, "step": 267500 }, { "epoch": 77.64, "learning_rate": 3.0590961761297796e-05, "loss": 1.0827, "step": 268000 }, { "epoch": 77.78, "learning_rate": 3.055475086906142e-05, "loss": 1.0376, "step": 268500 }, { "epoch": 77.93, "learning_rate": 3.051853997682503e-05, "loss": 1.0339, "step": 269000 }, { "epoch": 78.07, "learning_rate": 3.0482329084588644e-05, "loss": 1.0429, "step": 269500 }, { "epoch": 78.22, "learning_rate": 3.0446118192352264e-05, "loss": 1.0557, "step": 270000 }, { "epoch": 78.36, "learning_rate": 3.0409907300115876e-05, "loss": 0.9607, "step": 270500 }, { "epoch": 78.51, "learning_rate": 3.0373696407879493e-05, "loss": 1.0016, "step": 271000 }, { "epoch": 78.65, "learning_rate": 3.0337485515643105e-05, "loss": 1.0083, "step": 271500 }, { "epoch": 78.79, "learning_rate": 3.030127462340672e-05, "loss": 1.0785, "step": 272000 }, { "epoch": 78.94, "learning_rate": 3.026506373117034e-05, "loss": 1.0867, "step": 272500 }, { "epoch": 79.08, "learning_rate": 3.0228852838933954e-05, "loss": 0.9776, "step": 273000 }, { "epoch": 79.23, "learning_rate": 3.019264194669757e-05, "loss": 0.9559, "step": 273500 }, { "epoch": 79.37, "learning_rate": 3.0156431054461182e-05, "loss": 0.9912, "step": 274000 }, { "epoch": 79.52, "learning_rate": 3.01202201622248e-05, "loss": 1.0311, "step": 274500 }, { "epoch": 79.66, "learning_rate": 3.008400926998841e-05, "loss": 1.0281, "step": 275000 }, { "epoch": 79.81, "learning_rate": 3.0047798377752027e-05, "loss": 0.9119, "step": 275500 }, { "epoch": 79.95, "learning_rate": 3.0011587485515647e-05, "loss": 1.0614, "step": 276000 }, { "epoch": 80.1, "learning_rate": 2.997537659327926e-05, "loss": 1.0004, "step": 276500 }, { "epoch": 80.24, "learning_rate": 2.9939165701042876e-05, "loss": 0.9568, "step": 277000 }, { "epoch": 80.39, "learning_rate": 2.9902954808806488e-05, "loss": 0.9485, "step": 277500 }, { "epoch": 80.53, "learning_rate": 2.9866743916570104e-05, "loss": 0.9932, "step": 278000 }, { "epoch": 80.68, "learning_rate": 2.9830533024333724e-05, "loss": 0.9095, "step": 278500 }, { "epoch": 80.82, "learning_rate": 2.9794322132097337e-05, "loss": 0.929, "step": 279000 }, { "epoch": 80.97, "learning_rate": 2.9758111239860953e-05, "loss": 0.9774, "step": 279500 }, { "epoch": 81.11, "learning_rate": 2.9721900347624565e-05, "loss": 0.9507, "step": 280000 }, { "epoch": 81.26, "learning_rate": 2.968568945538818e-05, "loss": 0.9107, "step": 280500 }, { "epoch": 81.4, "learning_rate": 2.96494785631518e-05, "loss": 0.9239, "step": 281000 }, { "epoch": 81.55, "learning_rate": 2.9613267670915414e-05, "loss": 0.9795, "step": 281500 }, { "epoch": 81.69, "learning_rate": 2.957705677867903e-05, "loss": 0.9762, "step": 282000 }, { "epoch": 81.84, "learning_rate": 2.9540845886442642e-05, "loss": 0.9214, "step": 282500 }, { "epoch": 81.98, "learning_rate": 2.950463499420626e-05, "loss": 1.0133, "step": 283000 }, { "epoch": 82.13, "learning_rate": 2.946842410196987e-05, "loss": 0.9045, "step": 283500 }, { "epoch": 82.27, "learning_rate": 2.943221320973349e-05, "loss": 0.91, "step": 284000 }, { "epoch": 82.42, "learning_rate": 2.9396002317497107e-05, "loss": 0.9366, "step": 284500 }, { "epoch": 82.56, "learning_rate": 2.935979142526072e-05, "loss": 0.8921, "step": 285000 }, { "epoch": 82.71, "learning_rate": 2.9323580533024336e-05, "loss": 0.9532, "step": 285500 }, { "epoch": 82.85, "learning_rate": 2.928736964078795e-05, "loss": 0.9885, "step": 286000 }, { "epoch": 83.0, "learning_rate": 2.9251158748551564e-05, "loss": 0.953, "step": 286500 }, { "epoch": 83.14, "learning_rate": 2.9214947856315184e-05, "loss": 0.8898, "step": 287000 }, { "epoch": 83.29, "learning_rate": 2.9178736964078797e-05, "loss": 0.8683, "step": 287500 }, { "epoch": 83.43, "learning_rate": 2.9142526071842413e-05, "loss": 0.8697, "step": 288000 }, { "epoch": 83.57, "learning_rate": 2.9106315179606025e-05, "loss": 0.9246, "step": 288500 }, { "epoch": 83.72, "learning_rate": 2.907010428736964e-05, "loss": 0.904, "step": 289000 }, { "epoch": 83.86, "learning_rate": 2.9033893395133254e-05, "loss": 0.8879, "step": 289500 }, { "epoch": 84.01, "learning_rate": 2.8997682502896874e-05, "loss": 0.9338, "step": 290000 }, { "epoch": 84.15, "learning_rate": 2.896147161066049e-05, "loss": 0.8704, "step": 290500 }, { "epoch": 84.3, "learning_rate": 2.8925260718424102e-05, "loss": 0.8463, "step": 291000 }, { "epoch": 84.44, "learning_rate": 2.888904982618772e-05, "loss": 0.8428, "step": 291500 }, { "epoch": 84.59, "learning_rate": 2.885283893395133e-05, "loss": 0.9357, "step": 292000 }, { "epoch": 84.73, "learning_rate": 2.881662804171495e-05, "loss": 0.8905, "step": 292500 }, { "epoch": 84.88, "learning_rate": 2.8780417149478567e-05, "loss": 0.9016, "step": 293000 }, { "epoch": 85.02, "learning_rate": 2.874420625724218e-05, "loss": 0.9352, "step": 293500 }, { "epoch": 85.17, "learning_rate": 2.8707995365005796e-05, "loss": 0.8342, "step": 294000 }, { "epoch": 85.31, "learning_rate": 2.867178447276941e-05, "loss": 0.8075, "step": 294500 }, { "epoch": 85.46, "learning_rate": 2.8635573580533024e-05, "loss": 0.8817, "step": 295000 }, { "epoch": 85.6, "learning_rate": 2.8599362688296644e-05, "loss": 0.8684, "step": 295500 }, { "epoch": 85.75, "learning_rate": 2.8563151796060257e-05, "loss": 0.78, "step": 296000 }, { "epoch": 85.89, "learning_rate": 2.8526940903823873e-05, "loss": 0.9032, "step": 296500 }, { "epoch": 86.04, "learning_rate": 2.8490730011587485e-05, "loss": 0.8435, "step": 297000 }, { "epoch": 86.18, "learning_rate": 2.84545191193511e-05, "loss": 0.8357, "step": 297500 }, { "epoch": 86.33, "learning_rate": 2.8418308227114714e-05, "loss": 0.8398, "step": 298000 }, { "epoch": 86.47, "learning_rate": 2.8382097334878334e-05, "loss": 0.8226, "step": 298500 }, { "epoch": 86.62, "learning_rate": 2.834588644264195e-05, "loss": 0.8855, "step": 299000 }, { "epoch": 86.76, "learning_rate": 2.8309675550405563e-05, "loss": 0.8002, "step": 299500 }, { "epoch": 86.91, "learning_rate": 2.827346465816918e-05, "loss": 0.8452, "step": 300000 }, { "epoch": 87.05, "learning_rate": 2.823725376593279e-05, "loss": 0.917, "step": 300500 }, { "epoch": 87.2, "learning_rate": 2.820104287369641e-05, "loss": 0.8106, "step": 301000 }, { "epoch": 87.34, "learning_rate": 2.8164831981460027e-05, "loss": 0.8121, "step": 301500 }, { "epoch": 87.49, "learning_rate": 2.812862108922364e-05, "loss": 0.8031, "step": 302000 }, { "epoch": 87.63, "learning_rate": 2.8092410196987256e-05, "loss": 0.7967, "step": 302500 }, { "epoch": 87.78, "learning_rate": 2.805619930475087e-05, "loss": 0.8531, "step": 303000 }, { "epoch": 87.92, "learning_rate": 2.8019988412514488e-05, "loss": 0.8355, "step": 303500 }, { "epoch": 88.06, "learning_rate": 2.7983777520278097e-05, "loss": 0.8451, "step": 304000 }, { "epoch": 88.21, "learning_rate": 2.7947566628041717e-05, "loss": 0.787, "step": 304500 }, { "epoch": 88.35, "learning_rate": 2.7911355735805333e-05, "loss": 0.8058, "step": 305000 }, { "epoch": 88.5, "learning_rate": 2.7875144843568946e-05, "loss": 0.802, "step": 305500 }, { "epoch": 88.64, "learning_rate": 2.783893395133256e-05, "loss": 0.8519, "step": 306000 }, { "epoch": 88.79, "learning_rate": 2.7802723059096174e-05, "loss": 0.7892, "step": 306500 }, { "epoch": 88.93, "learning_rate": 2.7766512166859794e-05, "loss": 0.7878, "step": 307000 }, { "epoch": 89.08, "learning_rate": 2.773030127462341e-05, "loss": 0.7799, "step": 307500 }, { "epoch": 89.22, "learning_rate": 2.7694090382387023e-05, "loss": 0.7811, "step": 308000 }, { "epoch": 89.37, "learning_rate": 2.765787949015064e-05, "loss": 0.7494, "step": 308500 }, { "epoch": 89.51, "learning_rate": 2.762166859791425e-05, "loss": 0.7852, "step": 309000 }, { "epoch": 89.66, "learning_rate": 2.758545770567787e-05, "loss": 0.8175, "step": 309500 }, { "epoch": 89.8, "learning_rate": 2.7549246813441487e-05, "loss": 0.7376, "step": 310000 }, { "epoch": 89.95, "learning_rate": 2.75130359212051e-05, "loss": 0.7756, "step": 310500 }, { "epoch": 90.09, "learning_rate": 2.7476825028968716e-05, "loss": 0.7502, "step": 311000 }, { "epoch": 90.24, "learning_rate": 2.744061413673233e-05, "loss": 0.715, "step": 311500 }, { "epoch": 90.38, "learning_rate": 2.7404403244495948e-05, "loss": 0.7592, "step": 312000 }, { "epoch": 90.53, "learning_rate": 2.7368192352259557e-05, "loss": 0.756, "step": 312500 }, { "epoch": 90.67, "learning_rate": 2.7331981460023177e-05, "loss": 0.8058, "step": 313000 }, { "epoch": 90.82, "learning_rate": 2.7295770567786793e-05, "loss": 0.7458, "step": 313500 }, { "epoch": 90.96, "learning_rate": 2.7259559675550406e-05, "loss": 0.7984, "step": 314000 }, { "epoch": 91.11, "learning_rate": 2.7223348783314022e-05, "loss": 0.7359, "step": 314500 }, { "epoch": 91.25, "learning_rate": 2.7187137891077634e-05, "loss": 0.7224, "step": 315000 }, { "epoch": 91.4, "learning_rate": 2.7150926998841254e-05, "loss": 0.7386, "step": 315500 }, { "epoch": 91.54, "learning_rate": 2.711471610660487e-05, "loss": 0.7468, "step": 316000 }, { "epoch": 91.69, "learning_rate": 2.7078505214368483e-05, "loss": 0.7356, "step": 316500 }, { "epoch": 91.83, "learning_rate": 2.70422943221321e-05, "loss": 0.7545, "step": 317000 }, { "epoch": 91.98, "learning_rate": 2.700608342989571e-05, "loss": 0.7898, "step": 317500 }, { "epoch": 92.12, "learning_rate": 2.696987253765933e-05, "loss": 0.7097, "step": 318000 }, { "epoch": 92.27, "learning_rate": 2.693366164542294e-05, "loss": 0.7192, "step": 318500 }, { "epoch": 92.41, "learning_rate": 2.689745075318656e-05, "loss": 0.7318, "step": 319000 }, { "epoch": 92.56, "learning_rate": 2.6861239860950176e-05, "loss": 0.6905, "step": 319500 }, { "epoch": 92.7, "learning_rate": 2.682502896871379e-05, "loss": 0.7404, "step": 320000 }, { "epoch": 92.84, "learning_rate": 2.6788818076477408e-05, "loss": 0.7495, "step": 320500 }, { "epoch": 92.99, "learning_rate": 2.6752607184241017e-05, "loss": 0.7459, "step": 321000 }, { "epoch": 93.13, "learning_rate": 2.6716396292004637e-05, "loss": 0.7149, "step": 321500 }, { "epoch": 93.28, "learning_rate": 2.6680185399768253e-05, "loss": 0.6852, "step": 322000 }, { "epoch": 93.42, "learning_rate": 2.6643974507531866e-05, "loss": 0.6797, "step": 322500 }, { "epoch": 93.57, "learning_rate": 2.6607763615295482e-05, "loss": 0.6807, "step": 323000 }, { "epoch": 93.71, "learning_rate": 2.6571552723059094e-05, "loss": 0.7131, "step": 323500 }, { "epoch": 93.86, "learning_rate": 2.6535341830822714e-05, "loss": 0.7032, "step": 324000 }, { "epoch": 94.0, "learning_rate": 2.649913093858633e-05, "loss": 0.7271, "step": 324500 }, { "epoch": 94.15, "learning_rate": 2.6462920046349943e-05, "loss": 0.6575, "step": 325000 }, { "epoch": 94.29, "learning_rate": 2.642670915411356e-05, "loss": 0.7185, "step": 325500 }, { "epoch": 94.44, "learning_rate": 2.639049826187717e-05, "loss": 0.7487, "step": 326000 }, { "epoch": 94.58, "learning_rate": 2.635428736964079e-05, "loss": 0.7181, "step": 326500 }, { "epoch": 94.73, "learning_rate": 2.63180764774044e-05, "loss": 0.6834, "step": 327000 }, { "epoch": 94.87, "learning_rate": 2.628186558516802e-05, "loss": 0.7236, "step": 327500 }, { "epoch": 95.02, "learning_rate": 2.6245654692931636e-05, "loss": 0.6998, "step": 328000 }, { "epoch": 95.16, "learning_rate": 2.620944380069525e-05, "loss": 0.6912, "step": 328500 }, { "epoch": 95.31, "learning_rate": 2.6173232908458868e-05, "loss": 0.6545, "step": 329000 }, { "epoch": 95.45, "learning_rate": 2.6137022016222477e-05, "loss": 0.6852, "step": 329500 }, { "epoch": 95.6, "learning_rate": 2.6100811123986097e-05, "loss": 0.6328, "step": 330000 }, { "epoch": 95.74, "learning_rate": 2.6064600231749713e-05, "loss": 0.6718, "step": 330500 }, { "epoch": 95.89, "learning_rate": 2.6028389339513326e-05, "loss": 0.6903, "step": 331000 }, { "epoch": 96.03, "learning_rate": 2.5992178447276945e-05, "loss": 0.663, "step": 331500 }, { "epoch": 96.18, "learning_rate": 2.5955967555040555e-05, "loss": 0.6634, "step": 332000 }, { "epoch": 96.32, "learning_rate": 2.5919756662804174e-05, "loss": 0.6536, "step": 332500 }, { "epoch": 96.47, "learning_rate": 2.5883545770567787e-05, "loss": 0.6705, "step": 333000 }, { "epoch": 96.61, "learning_rate": 2.5847334878331403e-05, "loss": 0.6472, "step": 333500 }, { "epoch": 96.76, "learning_rate": 2.581112398609502e-05, "loss": 0.6946, "step": 334000 }, { "epoch": 96.9, "learning_rate": 2.577491309385863e-05, "loss": 0.6203, "step": 334500 }, { "epoch": 97.05, "learning_rate": 2.573870220162225e-05, "loss": 0.6359, "step": 335000 }, { "epoch": 97.19, "learning_rate": 2.570249130938586e-05, "loss": 0.6363, "step": 335500 }, { "epoch": 97.33, "learning_rate": 2.566628041714948e-05, "loss": 0.6343, "step": 336000 }, { "epoch": 97.48, "learning_rate": 2.5630069524913096e-05, "loss": 0.6251, "step": 336500 }, { "epoch": 97.62, "learning_rate": 2.559385863267671e-05, "loss": 0.6324, "step": 337000 }, { "epoch": 97.77, "learning_rate": 2.5557647740440328e-05, "loss": 0.6567, "step": 337500 }, { "epoch": 97.91, "learning_rate": 2.5521436848203938e-05, "loss": 0.6568, "step": 338000 }, { "epoch": 98.06, "learning_rate": 2.5485225955967557e-05, "loss": 0.6158, "step": 338500 }, { "epoch": 98.2, "learning_rate": 2.5449015063731173e-05, "loss": 0.5807, "step": 339000 }, { "epoch": 98.35, "learning_rate": 2.5412804171494786e-05, "loss": 0.6511, "step": 339500 }, { "epoch": 98.49, "learning_rate": 2.5376593279258405e-05, "loss": 0.6278, "step": 340000 }, { "epoch": 98.64, "learning_rate": 2.5340382387022015e-05, "loss": 0.6598, "step": 340500 }, { "epoch": 98.78, "learning_rate": 2.5304171494785634e-05, "loss": 0.6021, "step": 341000 }, { "epoch": 98.93, "learning_rate": 2.5267960602549247e-05, "loss": 0.6365, "step": 341500 }, { "epoch": 99.07, "learning_rate": 2.5231749710312863e-05, "loss": 0.6539, "step": 342000 }, { "epoch": 99.22, "learning_rate": 2.519553881807648e-05, "loss": 0.5917, "step": 342500 }, { "epoch": 99.36, "learning_rate": 2.5159327925840092e-05, "loss": 0.6234, "step": 343000 }, { "epoch": 99.51, "learning_rate": 2.512311703360371e-05, "loss": 0.6112, "step": 343500 }, { "epoch": 99.65, "learning_rate": 2.5086906141367324e-05, "loss": 0.6155, "step": 344000 }, { "epoch": 99.8, "learning_rate": 2.505069524913094e-05, "loss": 0.5829, "step": 344500 }, { "epoch": 99.94, "learning_rate": 2.5014484356894556e-05, "loss": 0.6073, "step": 345000 }, { "epoch": 100.09, "learning_rate": 2.497827346465817e-05, "loss": 0.5886, "step": 345500 }, { "epoch": 100.23, "learning_rate": 2.4942062572421785e-05, "loss": 0.5803, "step": 346000 }, { "epoch": 100.38, "learning_rate": 2.49058516801854e-05, "loss": 0.5704, "step": 346500 }, { "epoch": 100.52, "learning_rate": 2.4869640787949017e-05, "loss": 0.5902, "step": 347000 }, { "epoch": 100.67, "learning_rate": 2.4833429895712633e-05, "loss": 0.5799, "step": 347500 }, { "epoch": 100.81, "learning_rate": 2.4797219003476246e-05, "loss": 0.5898, "step": 348000 }, { "epoch": 100.96, "learning_rate": 2.4761008111239862e-05, "loss": 0.6129, "step": 348500 }, { "epoch": 101.1, "learning_rate": 2.4724797219003478e-05, "loss": 0.6226, "step": 349000 }, { "epoch": 101.25, "learning_rate": 2.4688586326767094e-05, "loss": 0.6064, "step": 349500 }, { "epoch": 101.39, "learning_rate": 2.4652375434530707e-05, "loss": 0.5727, "step": 350000 }, { "epoch": 101.54, "learning_rate": 2.4616164542294323e-05, "loss": 0.5478, "step": 350500 }, { "epoch": 101.68, "learning_rate": 2.457995365005794e-05, "loss": 0.5586, "step": 351000 }, { "epoch": 101.83, "learning_rate": 2.4543742757821552e-05, "loss": 0.5872, "step": 351500 }, { "epoch": 101.97, "learning_rate": 2.450753186558517e-05, "loss": 0.6057, "step": 352000 }, { "epoch": 102.11, "learning_rate": 2.4471320973348784e-05, "loss": 0.5403, "step": 352500 }, { "epoch": 102.26, "learning_rate": 2.44351100811124e-05, "loss": 0.5902, "step": 353000 }, { "epoch": 102.4, "learning_rate": 2.4398899188876016e-05, "loss": 0.5792, "step": 353500 }, { "epoch": 102.55, "learning_rate": 2.436268829663963e-05, "loss": 0.6074, "step": 354000 }, { "epoch": 102.69, "learning_rate": 2.4326477404403245e-05, "loss": 0.5505, "step": 354500 }, { "epoch": 102.84, "learning_rate": 2.429026651216686e-05, "loss": 0.6083, "step": 355000 }, { "epoch": 102.98, "learning_rate": 2.4254055619930477e-05, "loss": 0.5578, "step": 355500 }, { "epoch": 103.13, "learning_rate": 2.4217844727694093e-05, "loss": 0.5082, "step": 356000 }, { "epoch": 103.27, "learning_rate": 2.4181633835457706e-05, "loss": 0.5695, "step": 356500 }, { "epoch": 103.42, "learning_rate": 2.4145422943221322e-05, "loss": 0.5668, "step": 357000 }, { "epoch": 103.56, "learning_rate": 2.4109212050984935e-05, "loss": 0.5363, "step": 357500 }, { "epoch": 103.71, "learning_rate": 2.4073001158748554e-05, "loss": 0.5375, "step": 358000 }, { "epoch": 103.85, "learning_rate": 2.4036790266512167e-05, "loss": 0.5513, "step": 358500 }, { "epoch": 104.0, "learning_rate": 2.4000579374275783e-05, "loss": 0.5869, "step": 359000 }, { "epoch": 104.14, "learning_rate": 2.39643684820394e-05, "loss": 0.55, "step": 359500 }, { "epoch": 104.29, "learning_rate": 2.3928157589803012e-05, "loss": 0.5484, "step": 360000 }, { "epoch": 104.43, "learning_rate": 2.3891946697566628e-05, "loss": 0.5442, "step": 360500 }, { "epoch": 104.58, "learning_rate": 2.3855735805330244e-05, "loss": 0.5365, "step": 361000 }, { "epoch": 104.72, "learning_rate": 2.381952491309386e-05, "loss": 0.5659, "step": 361500 }, { "epoch": 104.87, "learning_rate": 2.3783314020857476e-05, "loss": 0.5471, "step": 362000 }, { "epoch": 105.01, "learning_rate": 2.374710312862109e-05, "loss": 0.5269, "step": 362500 }, { "epoch": 105.16, "learning_rate": 2.3710892236384705e-05, "loss": 0.5456, "step": 363000 }, { "epoch": 105.3, "learning_rate": 2.367468134414832e-05, "loss": 0.526, "step": 363500 }, { "epoch": 105.45, "learning_rate": 2.3638470451911937e-05, "loss": 0.5215, "step": 364000 }, { "epoch": 105.59, "learning_rate": 2.3602259559675553e-05, "loss": 0.5163, "step": 364500 }, { "epoch": 105.74, "learning_rate": 2.3566048667439166e-05, "loss": 0.5441, "step": 365000 }, { "epoch": 105.88, "learning_rate": 2.3529837775202782e-05, "loss": 0.5098, "step": 365500 }, { "epoch": 106.03, "learning_rate": 2.3493626882966395e-05, "loss": 0.5359, "step": 366000 }, { "epoch": 106.17, "learning_rate": 2.3457415990730014e-05, "loss": 0.5015, "step": 366500 }, { "epoch": 106.32, "learning_rate": 2.342120509849363e-05, "loss": 0.4858, "step": 367000 }, { "epoch": 106.46, "learning_rate": 2.3384994206257243e-05, "loss": 0.5311, "step": 367500 }, { "epoch": 106.6, "learning_rate": 2.334878331402086e-05, "loss": 0.5061, "step": 368000 }, { "epoch": 106.75, "learning_rate": 2.3312572421784472e-05, "loss": 0.5404, "step": 368500 }, { "epoch": 106.89, "learning_rate": 2.3276361529548088e-05, "loss": 0.5108, "step": 369000 }, { "epoch": 107.04, "learning_rate": 2.3240150637311704e-05, "loss": 0.4969, "step": 369500 }, { "epoch": 107.18, "learning_rate": 2.320393974507532e-05, "loss": 0.4969, "step": 370000 }, { "epoch": 107.33, "learning_rate": 2.3167728852838936e-05, "loss": 0.5172, "step": 370500 }, { "epoch": 107.47, "learning_rate": 2.313151796060255e-05, "loss": 0.5653, "step": 371000 }, { "epoch": 107.62, "learning_rate": 2.3095307068366165e-05, "loss": 0.517, "step": 371500 }, { "epoch": 107.76, "learning_rate": 2.305909617612978e-05, "loss": 0.496, "step": 372000 }, { "epoch": 107.91, "learning_rate": 2.3022885283893397e-05, "loss": 0.5357, "step": 372500 }, { "epoch": 108.05, "learning_rate": 2.2986674391657013e-05, "loss": 0.5115, "step": 373000 }, { "epoch": 108.2, "learning_rate": 2.2950463499420626e-05, "loss": 0.4683, "step": 373500 }, { "epoch": 108.34, "learning_rate": 2.2914252607184242e-05, "loss": 0.5017, "step": 374000 }, { "epoch": 108.49, "learning_rate": 2.2878041714947855e-05, "loss": 0.479, "step": 374500 }, { "epoch": 108.63, "learning_rate": 2.284183082271147e-05, "loss": 0.4886, "step": 375000 }, { "epoch": 108.78, "learning_rate": 2.280561993047509e-05, "loss": 0.4825, "step": 375500 }, { "epoch": 108.92, "learning_rate": 2.2769409038238703e-05, "loss": 0.4878, "step": 376000 }, { "epoch": 109.07, "learning_rate": 2.273319814600232e-05, "loss": 0.4942, "step": 376500 }, { "epoch": 109.21, "learning_rate": 2.2696987253765932e-05, "loss": 0.5016, "step": 377000 }, { "epoch": 109.36, "learning_rate": 2.2660776361529548e-05, "loss": 0.4747, "step": 377500 }, { "epoch": 109.5, "learning_rate": 2.2624565469293164e-05, "loss": 0.4661, "step": 378000 }, { "epoch": 109.65, "learning_rate": 2.258835457705678e-05, "loss": 0.4792, "step": 378500 }, { "epoch": 109.79, "learning_rate": 2.2552143684820396e-05, "loss": 0.5083, "step": 379000 }, { "epoch": 109.94, "learning_rate": 2.251593279258401e-05, "loss": 0.5082, "step": 379500 }, { "epoch": 110.08, "learning_rate": 2.2479721900347625e-05, "loss": 0.4781, "step": 380000 }, { "epoch": 110.23, "learning_rate": 2.244351100811124e-05, "loss": 0.4532, "step": 380500 }, { "epoch": 110.37, "learning_rate": 2.2407300115874857e-05, "loss": 0.4799, "step": 381000 }, { "epoch": 110.52, "learning_rate": 2.2371089223638473e-05, "loss": 0.47, "step": 381500 }, { "epoch": 110.66, "learning_rate": 2.2334878331402086e-05, "loss": 0.4906, "step": 382000 }, { "epoch": 110.81, "learning_rate": 2.2298667439165702e-05, "loss": 0.5021, "step": 382500 }, { "epoch": 110.95, "learning_rate": 2.226245654692932e-05, "loss": 0.505, "step": 383000 }, { "epoch": 111.1, "learning_rate": 2.222624565469293e-05, "loss": 0.4447, "step": 383500 }, { "epoch": 111.24, "learning_rate": 2.219003476245655e-05, "loss": 0.4363, "step": 384000 }, { "epoch": 111.38, "learning_rate": 2.2153823870220163e-05, "loss": 0.4352, "step": 384500 }, { "epoch": 111.53, "learning_rate": 2.211761297798378e-05, "loss": 0.4598, "step": 385000 }, { "epoch": 111.67, "learning_rate": 2.2081402085747392e-05, "loss": 0.4937, "step": 385500 }, { "epoch": 111.82, "learning_rate": 2.2045191193511008e-05, "loss": 0.5125, "step": 386000 }, { "epoch": 111.96, "learning_rate": 2.2008980301274624e-05, "loss": 0.446, "step": 386500 }, { "epoch": 112.11, "learning_rate": 2.197276940903824e-05, "loss": 0.4852, "step": 387000 }, { "epoch": 112.25, "learning_rate": 2.1936558516801856e-05, "loss": 0.4505, "step": 387500 }, { "epoch": 112.4, "learning_rate": 2.190034762456547e-05, "loss": 0.4709, "step": 388000 }, { "epoch": 112.54, "learning_rate": 2.1864136732329085e-05, "loss": 0.4521, "step": 388500 }, { "epoch": 112.69, "learning_rate": 2.18279258400927e-05, "loss": 0.4275, "step": 389000 }, { "epoch": 112.83, "learning_rate": 2.1791714947856314e-05, "loss": 0.4625, "step": 389500 }, { "epoch": 112.98, "learning_rate": 2.1755504055619934e-05, "loss": 0.4842, "step": 390000 }, { "epoch": 113.12, "learning_rate": 2.1719293163383546e-05, "loss": 0.4591, "step": 390500 }, { "epoch": 113.27, "learning_rate": 2.1683082271147162e-05, "loss": 0.4878, "step": 391000 }, { "epoch": 113.41, "learning_rate": 2.164687137891078e-05, "loss": 0.4352, "step": 391500 }, { "epoch": 113.56, "learning_rate": 2.161066048667439e-05, "loss": 0.4387, "step": 392000 }, { "epoch": 113.7, "learning_rate": 2.157444959443801e-05, "loss": 0.4172, "step": 392500 }, { "epoch": 113.85, "learning_rate": 2.1538238702201623e-05, "loss": 0.4304, "step": 393000 }, { "epoch": 113.99, "learning_rate": 2.150202780996524e-05, "loss": 0.4296, "step": 393500 }, { "epoch": 114.14, "learning_rate": 2.1465816917728852e-05, "loss": 0.4355, "step": 394000 }, { "epoch": 114.28, "learning_rate": 2.1429606025492468e-05, "loss": 0.4185, "step": 394500 }, { "epoch": 114.43, "learning_rate": 2.1393395133256084e-05, "loss": 0.4318, "step": 395000 }, { "epoch": 114.57, "learning_rate": 2.13571842410197e-05, "loss": 0.4081, "step": 395500 }, { "epoch": 114.72, "learning_rate": 2.1320973348783317e-05, "loss": 0.4273, "step": 396000 }, { "epoch": 114.86, "learning_rate": 2.128476245654693e-05, "loss": 0.4367, "step": 396500 }, { "epoch": 115.01, "learning_rate": 2.1248551564310545e-05, "loss": 0.4666, "step": 397000 }, { "epoch": 115.15, "learning_rate": 2.121234067207416e-05, "loss": 0.4519, "step": 397500 }, { "epoch": 115.3, "learning_rate": 2.1176129779837774e-05, "loss": 0.4253, "step": 398000 }, { "epoch": 115.44, "learning_rate": 2.1139918887601394e-05, "loss": 0.4376, "step": 398500 }, { "epoch": 115.59, "learning_rate": 2.1103707995365006e-05, "loss": 0.4602, "step": 399000 }, { "epoch": 115.73, "learning_rate": 2.1067497103128622e-05, "loss": 0.4096, "step": 399500 }, { "epoch": 115.87, "learning_rate": 2.103128621089224e-05, "loss": 0.4173, "step": 400000 }, { "epoch": 116.02, "learning_rate": 2.099507531865585e-05, "loss": 0.4236, "step": 400500 }, { "epoch": 116.16, "learning_rate": 2.0958864426419467e-05, "loss": 0.3931, "step": 401000 }, { "epoch": 116.31, "learning_rate": 2.0922653534183083e-05, "loss": 0.4301, "step": 401500 }, { "epoch": 116.45, "learning_rate": 2.08864426419467e-05, "loss": 0.4355, "step": 402000 }, { "epoch": 116.6, "learning_rate": 2.0850231749710312e-05, "loss": 0.4015, "step": 402500 }, { "epoch": 116.74, "learning_rate": 2.081402085747393e-05, "loss": 0.3858, "step": 403000 }, { "epoch": 116.89, "learning_rate": 2.0777809965237544e-05, "loss": 0.4162, "step": 403500 }, { "epoch": 117.03, "learning_rate": 2.074159907300116e-05, "loss": 0.4381, "step": 404000 }, { "epoch": 117.18, "learning_rate": 2.0705388180764777e-05, "loss": 0.4383, "step": 404500 }, { "epoch": 117.32, "learning_rate": 2.066917728852839e-05, "loss": 0.3823, "step": 405000 }, { "epoch": 117.47, "learning_rate": 2.0632966396292005e-05, "loss": 0.4065, "step": 405500 }, { "epoch": 117.61, "learning_rate": 2.059675550405562e-05, "loss": 0.4262, "step": 406000 }, { "epoch": 117.76, "learning_rate": 2.0560544611819234e-05, "loss": 0.4056, "step": 406500 }, { "epoch": 117.9, "learning_rate": 2.0524333719582854e-05, "loss": 0.4442, "step": 407000 }, { "epoch": 118.05, "learning_rate": 2.0488122827346466e-05, "loss": 0.453, "step": 407500 }, { "epoch": 118.19, "learning_rate": 2.0451911935110083e-05, "loss": 0.4199, "step": 408000 }, { "epoch": 118.34, "learning_rate": 2.04157010428737e-05, "loss": 0.3881, "step": 408500 }, { "epoch": 118.48, "learning_rate": 2.037949015063731e-05, "loss": 0.4093, "step": 409000 }, { "epoch": 118.63, "learning_rate": 2.0343279258400927e-05, "loss": 0.3842, "step": 409500 }, { "epoch": 118.77, "learning_rate": 2.0307068366164544e-05, "loss": 0.3937, "step": 410000 }, { "epoch": 118.92, "learning_rate": 2.027085747392816e-05, "loss": 0.4245, "step": 410500 }, { "epoch": 119.06, "learning_rate": 2.0234646581691776e-05, "loss": 0.3956, "step": 411000 }, { "epoch": 119.21, "learning_rate": 2.019843568945539e-05, "loss": 0.3782, "step": 411500 }, { "epoch": 119.35, "learning_rate": 2.0162224797219005e-05, "loss": 0.3795, "step": 412000 }, { "epoch": 119.5, "learning_rate": 2.0126013904982617e-05, "loss": 0.3825, "step": 412500 }, { "epoch": 119.64, "learning_rate": 2.0089803012746237e-05, "loss": 0.3702, "step": 413000 }, { "epoch": 119.79, "learning_rate": 2.005359212050985e-05, "loss": 0.3629, "step": 413500 }, { "epoch": 119.93, "learning_rate": 2.0017381228273466e-05, "loss": 0.4095, "step": 414000 }, { "epoch": 120.08, "learning_rate": 1.998117033603708e-05, "loss": 0.3685, "step": 414500 }, { "epoch": 120.22, "learning_rate": 1.9944959443800694e-05, "loss": 0.4241, "step": 415000 }, { "epoch": 120.37, "learning_rate": 1.990874855156431e-05, "loss": 0.3681, "step": 415500 }, { "epoch": 120.51, "learning_rate": 1.9872537659327926e-05, "loss": 0.3791, "step": 416000 }, { "epoch": 120.65, "learning_rate": 1.9836326767091543e-05, "loss": 0.3634, "step": 416500 }, { "epoch": 120.8, "learning_rate": 1.980011587485516e-05, "loss": 0.4004, "step": 417000 }, { "epoch": 120.94, "learning_rate": 1.976390498261877e-05, "loss": 0.3745, "step": 417500 }, { "epoch": 121.09, "learning_rate": 1.9727694090382387e-05, "loss": 0.3886, "step": 418000 }, { "epoch": 121.23, "learning_rate": 1.9691483198146004e-05, "loss": 0.3633, "step": 418500 }, { "epoch": 121.38, "learning_rate": 1.965527230590962e-05, "loss": 0.3649, "step": 419000 }, { "epoch": 121.52, "learning_rate": 1.9619061413673236e-05, "loss": 0.3719, "step": 419500 }, { "epoch": 121.67, "learning_rate": 1.958285052143685e-05, "loss": 0.4032, "step": 420000 }, { "epoch": 121.81, "learning_rate": 1.9546639629200465e-05, "loss": 0.378, "step": 420500 }, { "epoch": 121.96, "learning_rate": 1.9510428736964077e-05, "loss": 0.3737, "step": 421000 }, { "epoch": 122.1, "learning_rate": 1.9474217844727697e-05, "loss": 0.3599, "step": 421500 }, { "epoch": 122.25, "learning_rate": 1.943800695249131e-05, "loss": 0.3806, "step": 422000 }, { "epoch": 122.39, "learning_rate": 1.9401796060254926e-05, "loss": 0.3456, "step": 422500 }, { "epoch": 122.54, "learning_rate": 1.936558516801854e-05, "loss": 0.3443, "step": 423000 }, { "epoch": 122.68, "learning_rate": 1.9329374275782154e-05, "loss": 0.3855, "step": 423500 }, { "epoch": 122.83, "learning_rate": 1.929316338354577e-05, "loss": 0.3958, "step": 424000 }, { "epoch": 122.97, "learning_rate": 1.9256952491309387e-05, "loss": 0.3696, "step": 424500 }, { "epoch": 123.12, "learning_rate": 1.9220741599073003e-05, "loss": 0.3616, "step": 425000 }, { "epoch": 123.26, "learning_rate": 1.918453070683662e-05, "loss": 0.356, "step": 425500 }, { "epoch": 123.41, "learning_rate": 1.914831981460023e-05, "loss": 0.4097, "step": 426000 }, { "epoch": 123.55, "learning_rate": 1.9112108922363848e-05, "loss": 0.3496, "step": 426500 }, { "epoch": 123.7, "learning_rate": 1.9075898030127464e-05, "loss": 0.3326, "step": 427000 }, { "epoch": 123.84, "learning_rate": 1.903968713789108e-05, "loss": 0.3807, "step": 427500 }, { "epoch": 123.99, "learning_rate": 1.9003476245654696e-05, "loss": 0.4034, "step": 428000 }, { "epoch": 124.13, "learning_rate": 1.896726535341831e-05, "loss": 0.3497, "step": 428500 }, { "epoch": 124.28, "learning_rate": 1.8931054461181925e-05, "loss": 0.3475, "step": 429000 }, { "epoch": 124.42, "learning_rate": 1.8894843568945537e-05, "loss": 0.3313, "step": 429500 }, { "epoch": 124.57, "learning_rate": 1.8858632676709153e-05, "loss": 0.369, "step": 430000 }, { "epoch": 124.71, "learning_rate": 1.8822421784472773e-05, "loss": 0.3487, "step": 430500 }, { "epoch": 124.86, "learning_rate": 1.8786210892236386e-05, "loss": 0.329, "step": 431000 }, { "epoch": 125.0, "learning_rate": 1.8750000000000002e-05, "loss": 0.3298, "step": 431500 }, { "epoch": 125.14, "learning_rate": 1.8713789107763614e-05, "loss": 0.3246, "step": 432000 }, { "epoch": 125.29, "learning_rate": 1.867757821552723e-05, "loss": 0.3272, "step": 432500 }, { "epoch": 125.43, "learning_rate": 1.8641367323290847e-05, "loss": 0.3454, "step": 433000 }, { "epoch": 125.58, "learning_rate": 1.8605156431054463e-05, "loss": 0.3547, "step": 433500 }, { "epoch": 125.72, "learning_rate": 1.856894553881808e-05, "loss": 0.3334, "step": 434000 }, { "epoch": 125.87, "learning_rate": 1.853273464658169e-05, "loss": 0.3776, "step": 434500 }, { "epoch": 126.01, "learning_rate": 1.8496523754345308e-05, "loss": 0.3607, "step": 435000 }, { "epoch": 126.16, "learning_rate": 1.8460312862108924e-05, "loss": 0.329, "step": 435500 }, { "epoch": 126.3, "learning_rate": 1.842410196987254e-05, "loss": 0.33, "step": 436000 }, { "epoch": 126.45, "learning_rate": 1.8387891077636156e-05, "loss": 0.3466, "step": 436500 }, { "epoch": 126.59, "learning_rate": 1.835168018539977e-05, "loss": 0.3597, "step": 437000 }, { "epoch": 126.74, "learning_rate": 1.8315469293163385e-05, "loss": 0.3248, "step": 437500 }, { "epoch": 126.88, "learning_rate": 1.8279258400926997e-05, "loss": 0.3396, "step": 438000 }, { "epoch": 127.03, "learning_rate": 1.8243047508690614e-05, "loss": 0.3413, "step": 438500 }, { "epoch": 127.17, "learning_rate": 1.8206836616454233e-05, "loss": 0.3249, "step": 439000 }, { "epoch": 127.32, "learning_rate": 1.8170625724217846e-05, "loss": 0.3022, "step": 439500 }, { "epoch": 127.46, "learning_rate": 1.8134414831981462e-05, "loss": 0.308, "step": 440000 }, { "epoch": 127.61, "learning_rate": 1.8098203939745075e-05, "loss": 0.3446, "step": 440500 }, { "epoch": 127.75, "learning_rate": 1.806199304750869e-05, "loss": 0.3335, "step": 441000 }, { "epoch": 127.9, "learning_rate": 1.8025782155272307e-05, "loss": 0.335, "step": 441500 }, { "epoch": 128.04, "learning_rate": 1.7989571263035923e-05, "loss": 0.3059, "step": 442000 }, { "epoch": 128.19, "learning_rate": 1.795336037079954e-05, "loss": 0.3314, "step": 442500 }, { "epoch": 128.33, "learning_rate": 1.791714947856315e-05, "loss": 0.3004, "step": 443000 }, { "epoch": 128.48, "learning_rate": 1.7880938586326768e-05, "loss": 0.3249, "step": 443500 }, { "epoch": 128.62, "learning_rate": 1.7844727694090384e-05, "loss": 0.3198, "step": 444000 }, { "epoch": 128.77, "learning_rate": 1.7808516801853997e-05, "loss": 0.3394, "step": 444500 }, { "epoch": 128.91, "learning_rate": 1.7772305909617616e-05, "loss": 0.3075, "step": 445000 }, { "epoch": 129.06, "learning_rate": 1.773609501738123e-05, "loss": 0.334, "step": 445500 }, { "epoch": 129.2, "learning_rate": 1.7699884125144845e-05, "loss": 0.306, "step": 446000 }, { "epoch": 129.35, "learning_rate": 1.766367323290846e-05, "loss": 0.3146, "step": 446500 }, { "epoch": 129.49, "learning_rate": 1.7627462340672074e-05, "loss": 0.3068, "step": 447000 }, { "epoch": 129.63, "learning_rate": 1.7591251448435693e-05, "loss": 0.3124, "step": 447500 }, { "epoch": 129.78, "learning_rate": 1.7555040556199306e-05, "loss": 0.3082, "step": 448000 }, { "epoch": 129.92, "learning_rate": 1.7518829663962922e-05, "loss": 0.3215, "step": 448500 }, { "epoch": 130.07, "learning_rate": 1.7482618771726535e-05, "loss": 0.3114, "step": 449000 }, { "epoch": 130.21, "learning_rate": 1.744640787949015e-05, "loss": 0.3228, "step": 449500 }, { "epoch": 130.36, "learning_rate": 1.7410196987253767e-05, "loss": 0.3281, "step": 450000 }, { "epoch": 130.5, "learning_rate": 1.7373986095017383e-05, "loss": 0.2865, "step": 450500 }, { "epoch": 130.65, "learning_rate": 1.7337775202781e-05, "loss": 0.3214, "step": 451000 }, { "epoch": 130.79, "learning_rate": 1.730156431054461e-05, "loss": 0.3105, "step": 451500 }, { "epoch": 130.94, "learning_rate": 1.7265353418308228e-05, "loss": 0.3095, "step": 452000 }, { "epoch": 131.08, "learning_rate": 1.7229142526071844e-05, "loss": 0.3034, "step": 452500 }, { "epoch": 131.23, "learning_rate": 1.7192931633835457e-05, "loss": 0.3057, "step": 453000 }, { "epoch": 131.37, "learning_rate": 1.7156720741599076e-05, "loss": 0.2957, "step": 453500 }, { "epoch": 131.52, "learning_rate": 1.712050984936269e-05, "loss": 0.312, "step": 454000 }, { "epoch": 131.66, "learning_rate": 1.7084298957126305e-05, "loss": 0.29, "step": 454500 }, { "epoch": 131.81, "learning_rate": 1.704808806488992e-05, "loss": 0.2816, "step": 455000 }, { "epoch": 131.95, "learning_rate": 1.7011877172653534e-05, "loss": 0.3185, "step": 455500 }, { "epoch": 132.1, "learning_rate": 1.697566628041715e-05, "loss": 0.298, "step": 456000 }, { "epoch": 132.24, "learning_rate": 1.6939455388180766e-05, "loss": 0.2794, "step": 456500 }, { "epoch": 132.39, "learning_rate": 1.6903244495944382e-05, "loss": 0.29, "step": 457000 }, { "epoch": 132.53, "learning_rate": 1.6867033603707995e-05, "loss": 0.3004, "step": 457500 }, { "epoch": 132.68, "learning_rate": 1.683082271147161e-05, "loss": 0.3047, "step": 458000 }, { "epoch": 132.82, "learning_rate": 1.6794611819235227e-05, "loss": 0.3057, "step": 458500 }, { "epoch": 132.97, "learning_rate": 1.675840092699884e-05, "loss": 0.2959, "step": 459000 }, { "epoch": 133.11, "learning_rate": 1.672219003476246e-05, "loss": 0.317, "step": 459500 }, { "epoch": 133.26, "learning_rate": 1.6685979142526072e-05, "loss": 0.3087, "step": 460000 }, { "epoch": 133.4, "learning_rate": 1.6649768250289688e-05, "loss": 0.2817, "step": 460500 }, { "epoch": 133.55, "learning_rate": 1.6613557358053304e-05, "loss": 0.2774, "step": 461000 }, { "epoch": 133.69, "learning_rate": 1.6577346465816917e-05, "loss": 0.2932, "step": 461500 }, { "epoch": 133.84, "learning_rate": 1.6541135573580536e-05, "loss": 0.2784, "step": 462000 }, { "epoch": 133.98, "learning_rate": 1.650492468134415e-05, "loss": 0.3047, "step": 462500 }, { "epoch": 134.13, "learning_rate": 1.6468713789107765e-05, "loss": 0.2939, "step": 463000 }, { "epoch": 134.27, "learning_rate": 1.643250289687138e-05, "loss": 0.2729, "step": 463500 }, { "epoch": 134.41, "learning_rate": 1.6396292004634994e-05, "loss": 0.2736, "step": 464000 }, { "epoch": 134.56, "learning_rate": 1.636008111239861e-05, "loss": 0.2993, "step": 464500 }, { "epoch": 134.7, "learning_rate": 1.6323870220162226e-05, "loss": 0.285, "step": 465000 }, { "epoch": 134.85, "learning_rate": 1.6287659327925842e-05, "loss": 0.2693, "step": 465500 }, { "epoch": 134.99, "learning_rate": 1.6251448435689455e-05, "loss": 0.2976, "step": 466000 }, { "epoch": 135.14, "learning_rate": 1.621523754345307e-05, "loss": 0.2804, "step": 466500 }, { "epoch": 135.28, "learning_rate": 1.6179026651216687e-05, "loss": 0.2843, "step": 467000 }, { "epoch": 135.43, "learning_rate": 1.61428157589803e-05, "loss": 0.2744, "step": 467500 }, { "epoch": 135.57, "learning_rate": 1.610660486674392e-05, "loss": 0.2793, "step": 468000 }, { "epoch": 135.72, "learning_rate": 1.6070393974507532e-05, "loss": 0.285, "step": 468500 }, { "epoch": 135.86, "learning_rate": 1.6034183082271148e-05, "loss": 0.2775, "step": 469000 }, { "epoch": 136.01, "learning_rate": 1.5997972190034764e-05, "loss": 0.2836, "step": 469500 }, { "epoch": 136.15, "learning_rate": 1.5961761297798377e-05, "loss": 0.2635, "step": 470000 }, { "epoch": 136.3, "learning_rate": 1.5925550405561993e-05, "loss": 0.2745, "step": 470500 }, { "epoch": 136.44, "learning_rate": 1.588933951332561e-05, "loss": 0.264, "step": 471000 }, { "epoch": 136.59, "learning_rate": 1.5853128621089225e-05, "loss": 0.2761, "step": 471500 }, { "epoch": 136.73, "learning_rate": 1.581691772885284e-05, "loss": 0.2966, "step": 472000 }, { "epoch": 136.88, "learning_rate": 1.5780706836616454e-05, "loss": 0.2563, "step": 472500 }, { "epoch": 137.02, "learning_rate": 1.574449594438007e-05, "loss": 0.3046, "step": 473000 }, { "epoch": 137.17, "learning_rate": 1.5708285052143686e-05, "loss": 0.2558, "step": 473500 }, { "epoch": 137.31, "learning_rate": 1.5672074159907302e-05, "loss": 0.2572, "step": 474000 }, { "epoch": 137.46, "learning_rate": 1.5635863267670918e-05, "loss": 0.2545, "step": 474500 }, { "epoch": 137.6, "learning_rate": 1.559965237543453e-05, "loss": 0.2851, "step": 475000 }, { "epoch": 137.75, "learning_rate": 1.5563441483198147e-05, "loss": 0.2573, "step": 475500 }, { "epoch": 137.89, "learning_rate": 1.552723059096176e-05, "loss": 0.3079, "step": 476000 }, { "epoch": 138.04, "learning_rate": 1.549101969872538e-05, "loss": 0.2508, "step": 476500 }, { "epoch": 138.18, "learning_rate": 1.5454808806488992e-05, "loss": 0.2452, "step": 477000 }, { "epoch": 138.33, "learning_rate": 1.5418597914252608e-05, "loss": 0.2633, "step": 477500 }, { "epoch": 138.47, "learning_rate": 1.5382387022016224e-05, "loss": 0.2569, "step": 478000 }, { "epoch": 138.62, "learning_rate": 1.5346176129779837e-05, "loss": 0.2591, "step": 478500 }, { "epoch": 138.76, "learning_rate": 1.5309965237543453e-05, "loss": 0.272, "step": 479000 }, { "epoch": 138.9, "learning_rate": 1.527375434530707e-05, "loss": 0.2495, "step": 479500 }, { "epoch": 139.05, "learning_rate": 1.5237543453070685e-05, "loss": 0.2988, "step": 480000 }, { "epoch": 139.19, "learning_rate": 1.52013325608343e-05, "loss": 0.255, "step": 480500 }, { "epoch": 139.34, "learning_rate": 1.5165121668597914e-05, "loss": 0.2734, "step": 481000 }, { "epoch": 139.48, "learning_rate": 1.512891077636153e-05, "loss": 0.2574, "step": 481500 }, { "epoch": 139.63, "learning_rate": 1.5092699884125144e-05, "loss": 0.2649, "step": 482000 }, { "epoch": 139.77, "learning_rate": 1.5056488991888762e-05, "loss": 0.2445, "step": 482500 }, { "epoch": 139.92, "learning_rate": 1.5020278099652377e-05, "loss": 0.2909, "step": 483000 }, { "epoch": 140.06, "learning_rate": 1.4984067207415991e-05, "loss": 0.2669, "step": 483500 }, { "epoch": 140.21, "learning_rate": 1.4947856315179607e-05, "loss": 0.2534, "step": 484000 }, { "epoch": 140.35, "learning_rate": 1.4911645422943221e-05, "loss": 0.2586, "step": 484500 }, { "epoch": 140.5, "learning_rate": 1.4875434530706836e-05, "loss": 0.2407, "step": 485000 }, { "epoch": 140.64, "learning_rate": 1.4839223638470454e-05, "loss": 0.2306, "step": 485500 }, { "epoch": 140.79, "learning_rate": 1.4803012746234068e-05, "loss": 0.2588, "step": 486000 }, { "epoch": 140.93, "learning_rate": 1.4766801853997682e-05, "loss": 0.249, "step": 486500 }, { "epoch": 141.08, "learning_rate": 1.4730590961761299e-05, "loss": 0.2863, "step": 487000 }, { "epoch": 141.22, "learning_rate": 1.4694380069524913e-05, "loss": 0.2298, "step": 487500 }, { "epoch": 141.37, "learning_rate": 1.465816917728853e-05, "loss": 0.226, "step": 488000 }, { "epoch": 141.51, "learning_rate": 1.4621958285052145e-05, "loss": 0.2447, "step": 488500 }, { "epoch": 141.66, "learning_rate": 1.458574739281576e-05, "loss": 0.2686, "step": 489000 }, { "epoch": 141.8, "learning_rate": 1.4549536500579376e-05, "loss": 0.2503, "step": 489500 }, { "epoch": 141.95, "learning_rate": 1.451332560834299e-05, "loss": 0.2746, "step": 490000 }, { "epoch": 142.09, "learning_rate": 1.4477114716106604e-05, "loss": 0.2151, "step": 490500 }, { "epoch": 142.24, "learning_rate": 1.4440903823870222e-05, "loss": 0.2437, "step": 491000 }, { "epoch": 142.38, "learning_rate": 1.4404692931633837e-05, "loss": 0.2303, "step": 491500 }, { "epoch": 142.53, "learning_rate": 1.4368482039397451e-05, "loss": 0.2443, "step": 492000 }, { "epoch": 142.67, "learning_rate": 1.4332271147161067e-05, "loss": 0.2637, "step": 492500 }, { "epoch": 142.82, "learning_rate": 1.4296060254924682e-05, "loss": 0.2514, "step": 493000 }, { "epoch": 142.96, "learning_rate": 1.4259849362688296e-05, "loss": 0.2576, "step": 493500 }, { "epoch": 143.11, "learning_rate": 1.4223638470451914e-05, "loss": 0.2524, "step": 494000 }, { "epoch": 143.25, "learning_rate": 1.4187427578215528e-05, "loss": 0.2366, "step": 494500 }, { "epoch": 143.4, "learning_rate": 1.4151216685979144e-05, "loss": 0.2462, "step": 495000 }, { "epoch": 143.54, "learning_rate": 1.4115005793742759e-05, "loss": 0.2367, "step": 495500 }, { "epoch": 143.68, "learning_rate": 1.4078794901506373e-05, "loss": 0.2321, "step": 496000 }, { "epoch": 143.83, "learning_rate": 1.4042584009269987e-05, "loss": 0.2467, "step": 496500 }, { "epoch": 143.97, "learning_rate": 1.4006373117033605e-05, "loss": 0.229, "step": 497000 }, { "epoch": 144.12, "learning_rate": 1.397016222479722e-05, "loss": 0.237, "step": 497500 }, { "epoch": 144.26, "learning_rate": 1.3933951332560836e-05, "loss": 0.2555, "step": 498000 }, { "epoch": 144.41, "learning_rate": 1.389774044032445e-05, "loss": 0.2545, "step": 498500 }, { "epoch": 144.55, "learning_rate": 1.3861529548088065e-05, "loss": 0.2443, "step": 499000 }, { "epoch": 144.7, "learning_rate": 1.3825318655851679e-05, "loss": 0.2723, "step": 499500 }, { "epoch": 144.84, "learning_rate": 1.3789107763615297e-05, "loss": 0.2444, "step": 500000 }, { "epoch": 144.99, "learning_rate": 1.3752896871378911e-05, "loss": 0.2326, "step": 500500 }, { "epoch": 145.13, "learning_rate": 1.3716685979142527e-05, "loss": 0.2243, "step": 501000 }, { "epoch": 145.28, "learning_rate": 1.3680475086906142e-05, "loss": 0.2313, "step": 501500 }, { "epoch": 145.42, "learning_rate": 1.3644264194669756e-05, "loss": 0.2395, "step": 502000 }, { "epoch": 145.57, "learning_rate": 1.3608053302433374e-05, "loss": 0.2327, "step": 502500 }, { "epoch": 145.71, "learning_rate": 1.3571842410196988e-05, "loss": 0.2246, "step": 503000 }, { "epoch": 145.86, "learning_rate": 1.3535631517960604e-05, "loss": 0.2169, "step": 503500 }, { "epoch": 146.0, "learning_rate": 1.3499420625724219e-05, "loss": 0.2386, "step": 504000 }, { "epoch": 146.15, "learning_rate": 1.3463209733487833e-05, "loss": 0.238, "step": 504500 }, { "epoch": 146.29, "learning_rate": 1.3426998841251448e-05, "loss": 0.2387, "step": 505000 }, { "epoch": 146.44, "learning_rate": 1.3390787949015065e-05, "loss": 0.2223, "step": 505500 }, { "epoch": 146.58, "learning_rate": 1.335457705677868e-05, "loss": 0.2103, "step": 506000 }, { "epoch": 146.73, "learning_rate": 1.3318366164542296e-05, "loss": 0.2209, "step": 506500 }, { "epoch": 146.87, "learning_rate": 1.328215527230591e-05, "loss": 0.2444, "step": 507000 }, { "epoch": 147.02, "learning_rate": 1.3245944380069525e-05, "loss": 0.2337, "step": 507500 }, { "epoch": 147.16, "learning_rate": 1.3209733487833139e-05, "loss": 0.2242, "step": 508000 }, { "epoch": 147.31, "learning_rate": 1.3173522595596757e-05, "loss": 0.2305, "step": 508500 }, { "epoch": 147.45, "learning_rate": 1.3137311703360373e-05, "loss": 0.216, "step": 509000 }, { "epoch": 147.6, "learning_rate": 1.3101100811123987e-05, "loss": 0.2132, "step": 509500 }, { "epoch": 147.74, "learning_rate": 1.3064889918887602e-05, "loss": 0.2571, "step": 510000 }, { "epoch": 147.89, "learning_rate": 1.3028679026651216e-05, "loss": 0.2287, "step": 510500 }, { "epoch": 148.03, "learning_rate": 1.299246813441483e-05, "loss": 0.2344, "step": 511000 }, { "epoch": 148.17, "learning_rate": 1.2956257242178448e-05, "loss": 0.2202, "step": 511500 }, { "epoch": 148.32, "learning_rate": 1.2920046349942064e-05, "loss": 0.1859, "step": 512000 }, { "epoch": 148.46, "learning_rate": 1.2883835457705679e-05, "loss": 0.1972, "step": 512500 }, { "epoch": 148.61, "learning_rate": 1.2847624565469293e-05, "loss": 0.2232, "step": 513000 }, { "epoch": 148.75, "learning_rate": 1.2811413673232908e-05, "loss": 0.2051, "step": 513500 }, { "epoch": 148.9, "learning_rate": 1.2775202780996524e-05, "loss": 0.2272, "step": 514000 }, { "epoch": 149.04, "learning_rate": 1.273899188876014e-05, "loss": 0.1866, "step": 514500 }, { "epoch": 149.19, "learning_rate": 1.2702780996523756e-05, "loss": 0.1998, "step": 515000 }, { "epoch": 149.33, "learning_rate": 1.266657010428737e-05, "loss": 0.1948, "step": 515500 }, { "epoch": 149.48, "learning_rate": 1.2630359212050985e-05, "loss": 0.2025, "step": 516000 }, { "epoch": 149.62, "learning_rate": 1.2594148319814599e-05, "loss": 0.2197, "step": 516500 }, { "epoch": 149.77, "learning_rate": 1.2557937427578217e-05, "loss": 0.2142, "step": 517000 }, { "epoch": 149.91, "learning_rate": 1.2521726535341833e-05, "loss": 0.2137, "step": 517500 }, { "epoch": 150.06, "learning_rate": 1.2485515643105447e-05, "loss": 0.2164, "step": 518000 }, { "epoch": 150.2, "learning_rate": 1.2449304750869062e-05, "loss": 0.1961, "step": 518500 }, { "epoch": 150.35, "learning_rate": 1.2413093858632676e-05, "loss": 0.2187, "step": 519000 }, { "epoch": 150.49, "learning_rate": 1.2376882966396292e-05, "loss": 0.1937, "step": 519500 }, { "epoch": 150.64, "learning_rate": 1.2340672074159908e-05, "loss": 0.2133, "step": 520000 }, { "epoch": 150.78, "learning_rate": 1.2304461181923523e-05, "loss": 0.2468, "step": 520500 }, { "epoch": 150.93, "learning_rate": 1.2268250289687139e-05, "loss": 0.2023, "step": 521000 }, { "epoch": 151.07, "learning_rate": 1.2232039397450753e-05, "loss": 0.2029, "step": 521500 }, { "epoch": 151.22, "learning_rate": 1.219582850521437e-05, "loss": 0.2073, "step": 522000 }, { "epoch": 151.36, "learning_rate": 1.2159617612977984e-05, "loss": 0.2003, "step": 522500 }, { "epoch": 151.51, "learning_rate": 1.21234067207416e-05, "loss": 0.2107, "step": 523000 }, { "epoch": 151.65, "learning_rate": 1.2087195828505216e-05, "loss": 0.2126, "step": 523500 }, { "epoch": 151.8, "learning_rate": 1.205098493626883e-05, "loss": 0.2001, "step": 524000 }, { "epoch": 151.94, "learning_rate": 1.2014774044032445e-05, "loss": 0.228, "step": 524500 }, { "epoch": 152.09, "learning_rate": 1.1978563151796061e-05, "loss": 0.2013, "step": 525000 }, { "epoch": 152.23, "learning_rate": 1.1942352259559677e-05, "loss": 0.2065, "step": 525500 }, { "epoch": 152.38, "learning_rate": 1.1906141367323291e-05, "loss": 0.1917, "step": 526000 }, { "epoch": 152.52, "learning_rate": 1.1869930475086907e-05, "loss": 0.2089, "step": 526500 }, { "epoch": 152.67, "learning_rate": 1.1833719582850522e-05, "loss": 0.1965, "step": 527000 }, { "epoch": 152.81, "learning_rate": 1.1797508690614136e-05, "loss": 0.2153, "step": 527500 }, { "epoch": 152.95, "learning_rate": 1.1761297798377752e-05, "loss": 0.1984, "step": 528000 }, { "epoch": 153.1, "learning_rate": 1.1725086906141368e-05, "loss": 0.1944, "step": 528500 }, { "epoch": 153.24, "learning_rate": 1.1688876013904983e-05, "loss": 0.2069, "step": 529000 }, { "epoch": 153.39, "learning_rate": 1.1652665121668599e-05, "loss": 0.1943, "step": 529500 }, { "epoch": 153.53, "learning_rate": 1.1616454229432213e-05, "loss": 0.1839, "step": 530000 }, { "epoch": 153.68, "learning_rate": 1.1580243337195828e-05, "loss": 0.2062, "step": 530500 }, { "epoch": 153.82, "learning_rate": 1.1544032444959446e-05, "loss": 0.2067, "step": 531000 }, { "epoch": 153.97, "learning_rate": 1.150782155272306e-05, "loss": 0.2113, "step": 531500 }, { "epoch": 154.11, "learning_rate": 1.1471610660486674e-05, "loss": 0.2214, "step": 532000 }, { "epoch": 154.26, "learning_rate": 1.143539976825029e-05, "loss": 0.203, "step": 532500 }, { "epoch": 154.4, "learning_rate": 1.1399188876013905e-05, "loss": 0.1725, "step": 533000 }, { "epoch": 154.55, "learning_rate": 1.1362977983777521e-05, "loss": 0.1898, "step": 533500 }, { "epoch": 154.69, "learning_rate": 1.1326767091541137e-05, "loss": 0.2152, "step": 534000 }, { "epoch": 154.84, "learning_rate": 1.1290556199304751e-05, "loss": 0.1984, "step": 534500 }, { "epoch": 154.98, "learning_rate": 1.1254345307068366e-05, "loss": 0.2074, "step": 535000 }, { "epoch": 155.13, "learning_rate": 1.1218134414831982e-05, "loss": 0.203, "step": 535500 }, { "epoch": 155.27, "learning_rate": 1.1181923522595596e-05, "loss": 0.2135, "step": 536000 }, { "epoch": 155.42, "learning_rate": 1.1145712630359212e-05, "loss": 0.191, "step": 536500 }, { "epoch": 155.56, "learning_rate": 1.1109501738122829e-05, "loss": 0.1882, "step": 537000 }, { "epoch": 155.71, "learning_rate": 1.1073290845886443e-05, "loss": 0.1999, "step": 537500 }, { "epoch": 155.85, "learning_rate": 1.1037079953650059e-05, "loss": 0.1769, "step": 538000 }, { "epoch": 156.0, "learning_rate": 1.1000869061413673e-05, "loss": 0.1962, "step": 538500 }, { "epoch": 156.14, "learning_rate": 1.096465816917729e-05, "loss": 0.1978, "step": 539000 }, { "epoch": 156.29, "learning_rate": 1.0928447276940906e-05, "loss": 0.1693, "step": 539500 }, { "epoch": 156.43, "learning_rate": 1.089223638470452e-05, "loss": 0.1777, "step": 540000 }, { "epoch": 156.58, "learning_rate": 1.0856025492468134e-05, "loss": 0.1803, "step": 540500 }, { "epoch": 156.72, "learning_rate": 1.081981460023175e-05, "loss": 0.1683, "step": 541000 }, { "epoch": 156.87, "learning_rate": 1.0783603707995365e-05, "loss": 0.1918, "step": 541500 }, { "epoch": 157.01, "learning_rate": 1.0747392815758981e-05, "loss": 0.1938, "step": 542000 }, { "epoch": 157.16, "learning_rate": 1.0711181923522597e-05, "loss": 0.1879, "step": 542500 }, { "epoch": 157.3, "learning_rate": 1.0674971031286211e-05, "loss": 0.1927, "step": 543000 }, { "epoch": 157.44, "learning_rate": 1.0638760139049826e-05, "loss": 0.1724, "step": 543500 }, { "epoch": 157.59, "learning_rate": 1.0602549246813442e-05, "loss": 0.1943, "step": 544000 }, { "epoch": 157.73, "learning_rate": 1.0566338354577056e-05, "loss": 0.172, "step": 544500 }, { "epoch": 157.88, "learning_rate": 1.0530127462340672e-05, "loss": 0.1979, "step": 545000 }, { "epoch": 158.02, "learning_rate": 1.0493916570104289e-05, "loss": 0.2234, "step": 545500 }, { "epoch": 158.17, "learning_rate": 1.0457705677867903e-05, "loss": 0.1712, "step": 546000 }, { "epoch": 158.31, "learning_rate": 1.0421494785631517e-05, "loss": 0.2199, "step": 546500 }, { "epoch": 158.46, "learning_rate": 1.0385283893395133e-05, "loss": 0.1846, "step": 547000 }, { "epoch": 158.6, "learning_rate": 1.034907300115875e-05, "loss": 0.1832, "step": 547500 }, { "epoch": 158.75, "learning_rate": 1.0312862108922364e-05, "loss": 0.1684, "step": 548000 }, { "epoch": 158.89, "learning_rate": 1.027665121668598e-05, "loss": 0.1741, "step": 548500 }, { "epoch": 159.04, "learning_rate": 1.0240440324449594e-05, "loss": 0.1623, "step": 549000 }, { "epoch": 159.18, "learning_rate": 1.0204229432213209e-05, "loss": 0.1776, "step": 549500 }, { "epoch": 159.33, "learning_rate": 1.0168018539976825e-05, "loss": 0.1863, "step": 550000 }, { "epoch": 159.47, "learning_rate": 1.0131807647740441e-05, "loss": 0.1685, "step": 550500 }, { "epoch": 159.62, "learning_rate": 1.0095596755504057e-05, "loss": 0.1973, "step": 551000 }, { "epoch": 159.76, "learning_rate": 1.0059385863267672e-05, "loss": 0.1711, "step": 551500 }, { "epoch": 159.91, "learning_rate": 1.0023174971031286e-05, "loss": 0.1814, "step": 552000 }, { "epoch": 160.05, "learning_rate": 9.986964078794902e-06, "loss": 0.167, "step": 552500 }, { "epoch": 160.2, "learning_rate": 9.950753186558518e-06, "loss": 0.1938, "step": 553000 }, { "epoch": 160.34, "learning_rate": 9.914542294322133e-06, "loss": 0.167, "step": 553500 }, { "epoch": 160.49, "learning_rate": 9.878331402085749e-06, "loss": 0.1617, "step": 554000 }, { "epoch": 160.63, "learning_rate": 9.842120509849363e-06, "loss": 0.172, "step": 554500 }, { "epoch": 160.78, "learning_rate": 9.805909617612977e-06, "loss": 0.1737, "step": 555000 }, { "epoch": 160.92, "learning_rate": 9.769698725376594e-06, "loss": 0.1816, "step": 555500 }, { "epoch": 161.07, "learning_rate": 9.73348783314021e-06, "loss": 0.1628, "step": 556000 }, { "epoch": 161.21, "learning_rate": 9.697276940903824e-06, "loss": 0.1696, "step": 556500 }, { "epoch": 161.36, "learning_rate": 9.66106604866744e-06, "loss": 0.1656, "step": 557000 }, { "epoch": 161.5, "learning_rate": 9.624855156431055e-06, "loss": 0.1559, "step": 557500 }, { "epoch": 161.65, "learning_rate": 9.588644264194669e-06, "loss": 0.172, "step": 558000 }, { "epoch": 161.79, "learning_rate": 9.552433371958287e-06, "loss": 0.1878, "step": 558500 }, { "epoch": 161.94, "learning_rate": 9.516222479721901e-06, "loss": 0.1682, "step": 559000 }, { "epoch": 162.08, "learning_rate": 9.480011587485516e-06, "loss": 0.1755, "step": 559500 }, { "epoch": 162.22, "learning_rate": 9.443800695249132e-06, "loss": 0.2092, "step": 560000 }, { "epoch": 162.37, "learning_rate": 9.407589803012746e-06, "loss": 0.1784, "step": 560500 }, { "epoch": 162.51, "learning_rate": 9.371378910776362e-06, "loss": 0.154, "step": 561000 }, { "epoch": 162.66, "learning_rate": 9.335168018539978e-06, "loss": 0.1892, "step": 561500 }, { "epoch": 162.8, "learning_rate": 9.298957126303593e-06, "loss": 0.1679, "step": 562000 }, { "epoch": 162.95, "learning_rate": 9.262746234067207e-06, "loss": 0.1699, "step": 562500 }, { "epoch": 163.09, "learning_rate": 9.226535341830823e-06, "loss": 0.1553, "step": 563000 }, { "epoch": 163.24, "learning_rate": 9.190324449594438e-06, "loss": 0.1792, "step": 563500 }, { "epoch": 163.38, "learning_rate": 9.154113557358054e-06, "loss": 0.1611, "step": 564000 }, { "epoch": 163.53, "learning_rate": 9.11790266512167e-06, "loss": 0.1682, "step": 564500 }, { "epoch": 163.67, "learning_rate": 9.081691772885284e-06, "loss": 0.1765, "step": 565000 }, { "epoch": 163.82, "learning_rate": 9.0454808806489e-06, "loss": 0.1928, "step": 565500 }, { "epoch": 163.96, "learning_rate": 9.009269988412515e-06, "loss": 0.1758, "step": 566000 }, { "epoch": 164.11, "learning_rate": 8.97305909617613e-06, "loss": 0.1599, "step": 566500 }, { "epoch": 164.25, "learning_rate": 8.936848203939747e-06, "loss": 0.1793, "step": 567000 }, { "epoch": 164.4, "learning_rate": 8.900637311703361e-06, "loss": 0.151, "step": 567500 }, { "epoch": 164.54, "learning_rate": 8.864426419466976e-06, "loss": 0.1545, "step": 568000 }, { "epoch": 164.69, "learning_rate": 8.828215527230592e-06, "loss": 0.1745, "step": 568500 }, { "epoch": 164.83, "learning_rate": 8.792004634994206e-06, "loss": 0.175, "step": 569000 }, { "epoch": 164.98, "learning_rate": 8.755793742757822e-06, "loss": 0.1685, "step": 569500 }, { "epoch": 165.12, "learning_rate": 8.719582850521438e-06, "loss": 0.1714, "step": 570000 }, { "epoch": 165.27, "learning_rate": 8.683371958285053e-06, "loss": 0.1596, "step": 570500 }, { "epoch": 165.41, "learning_rate": 8.647161066048667e-06, "loss": 0.1514, "step": 571000 }, { "epoch": 165.56, "learning_rate": 8.610950173812283e-06, "loss": 0.1698, "step": 571500 }, { "epoch": 165.7, "learning_rate": 8.574739281575898e-06, "loss": 0.1652, "step": 572000 }, { "epoch": 165.85, "learning_rate": 8.538528389339514e-06, "loss": 0.1615, "step": 572500 }, { "epoch": 165.99, "learning_rate": 8.50231749710313e-06, "loss": 0.1452, "step": 573000 }, { "epoch": 166.14, "learning_rate": 8.466106604866744e-06, "loss": 0.1636, "step": 573500 }, { "epoch": 166.28, "learning_rate": 8.429895712630359e-06, "loss": 0.165, "step": 574000 }, { "epoch": 166.43, "learning_rate": 8.393684820393975e-06, "loss": 0.1494, "step": 574500 }, { "epoch": 166.57, "learning_rate": 8.35747392815759e-06, "loss": 0.1576, "step": 575000 }, { "epoch": 166.71, "learning_rate": 8.321263035921205e-06, "loss": 0.1632, "step": 575500 }, { "epoch": 166.86, "learning_rate": 8.285052143684821e-06, "loss": 0.16, "step": 576000 }, { "epoch": 167.0, "learning_rate": 8.248841251448436e-06, "loss": 0.1682, "step": 576500 }, { "epoch": 167.15, "learning_rate": 8.21263035921205e-06, "loss": 0.1602, "step": 577000 }, { "epoch": 167.29, "learning_rate": 8.176419466975666e-06, "loss": 0.1463, "step": 577500 }, { "epoch": 167.44, "learning_rate": 8.140208574739282e-06, "loss": 0.1599, "step": 578000 }, { "epoch": 167.58, "learning_rate": 8.103997682502898e-06, "loss": 0.1446, "step": 578500 }, { "epoch": 167.73, "learning_rate": 8.067786790266513e-06, "loss": 0.1541, "step": 579000 }, { "epoch": 167.87, "learning_rate": 8.031575898030127e-06, "loss": 0.1572, "step": 579500 }, { "epoch": 168.02, "learning_rate": 7.995365005793743e-06, "loss": 0.1458, "step": 580000 }, { "epoch": 168.16, "learning_rate": 7.95915411355736e-06, "loss": 0.1426, "step": 580500 }, { "epoch": 168.31, "learning_rate": 7.922943221320974e-06, "loss": 0.1396, "step": 581000 }, { "epoch": 168.45, "learning_rate": 7.88673232908459e-06, "loss": 0.1583, "step": 581500 }, { "epoch": 168.6, "learning_rate": 7.850521436848204e-06, "loss": 0.1841, "step": 582000 }, { "epoch": 168.74, "learning_rate": 7.814310544611819e-06, "loss": 0.1544, "step": 582500 }, { "epoch": 168.89, "learning_rate": 7.778099652375435e-06, "loss": 0.167, "step": 583000 }, { "epoch": 169.03, "learning_rate": 7.741888760139051e-06, "loss": 0.1597, "step": 583500 }, { "epoch": 169.18, "learning_rate": 7.705677867902665e-06, "loss": 0.1523, "step": 584000 }, { "epoch": 169.32, "learning_rate": 7.669466975666281e-06, "loss": 0.1497, "step": 584500 }, { "epoch": 169.47, "learning_rate": 7.633256083429896e-06, "loss": 0.1541, "step": 585000 }, { "epoch": 169.61, "learning_rate": 7.597045191193511e-06, "loss": 0.1471, "step": 585500 }, { "epoch": 169.76, "learning_rate": 7.560834298957127e-06, "loss": 0.1812, "step": 586000 }, { "epoch": 169.9, "learning_rate": 7.5246234067207415e-06, "loss": 0.1556, "step": 586500 }, { "epoch": 170.05, "learning_rate": 7.488412514484357e-06, "loss": 0.1353, "step": 587000 }, { "epoch": 170.19, "learning_rate": 7.452201622247973e-06, "loss": 0.1334, "step": 587500 }, { "epoch": 170.34, "learning_rate": 7.415990730011588e-06, "loss": 0.1455, "step": 588000 }, { "epoch": 170.48, "learning_rate": 7.3797798377752025e-06, "loss": 0.1629, "step": 588500 }, { "epoch": 170.63, "learning_rate": 7.343568945538819e-06, "loss": 0.1388, "step": 589000 }, { "epoch": 170.77, "learning_rate": 7.307358053302434e-06, "loss": 0.159, "step": 589500 }, { "epoch": 170.92, "learning_rate": 7.271147161066048e-06, "loss": 0.157, "step": 590000 }, { "epoch": 171.06, "learning_rate": 7.234936268829664e-06, "loss": 0.1471, "step": 590500 }, { "epoch": 171.21, "learning_rate": 7.19872537659328e-06, "loss": 0.1879, "step": 591000 }, { "epoch": 171.35, "learning_rate": 7.162514484356894e-06, "loss": 0.1651, "step": 591500 }, { "epoch": 171.49, "learning_rate": 7.12630359212051e-06, "loss": 0.1657, "step": 592000 }, { "epoch": 171.64, "learning_rate": 7.090092699884125e-06, "loss": 0.1521, "step": 592500 }, { "epoch": 171.78, "learning_rate": 7.0538818076477414e-06, "loss": 0.1404, "step": 593000 }, { "epoch": 171.93, "learning_rate": 7.017670915411356e-06, "loss": 0.138, "step": 593500 }, { "epoch": 172.07, "learning_rate": 6.981460023174971e-06, "loss": 0.1545, "step": 594000 }, { "epoch": 172.22, "learning_rate": 6.945249130938587e-06, "loss": 0.1472, "step": 594500 }, { "epoch": 172.36, "learning_rate": 6.9090382387022024e-06, "loss": 0.1593, "step": 595000 }, { "epoch": 172.51, "learning_rate": 6.872827346465817e-06, "loss": 0.1461, "step": 595500 }, { "epoch": 172.65, "learning_rate": 6.836616454229433e-06, "loss": 0.1279, "step": 596000 }, { "epoch": 172.8, "learning_rate": 6.800405561993048e-06, "loss": 0.135, "step": 596500 }, { "epoch": 172.94, "learning_rate": 6.764194669756663e-06, "loss": 0.14, "step": 597000 }, { "epoch": 173.09, "learning_rate": 6.727983777520279e-06, "loss": 0.1307, "step": 597500 }, { "epoch": 173.23, "learning_rate": 6.691772885283894e-06, "loss": 0.1356, "step": 598000 }, { "epoch": 173.38, "learning_rate": 6.655561993047508e-06, "loss": 0.1466, "step": 598500 }, { "epoch": 173.52, "learning_rate": 6.6193511008111244e-06, "loss": 0.1361, "step": 599000 }, { "epoch": 173.67, "learning_rate": 6.58314020857474e-06, "loss": 0.1336, "step": 599500 }, { "epoch": 173.81, "learning_rate": 6.546929316338354e-06, "loss": 0.129, "step": 600000 }, { "epoch": 173.96, "learning_rate": 6.51071842410197e-06, "loss": 0.1637, "step": 600500 }, { "epoch": 174.1, "learning_rate": 6.474507531865585e-06, "loss": 0.1632, "step": 601000 }, { "epoch": 174.25, "learning_rate": 6.4382966396292e-06, "loss": 0.151, "step": 601500 }, { "epoch": 174.39, "learning_rate": 6.402085747392817e-06, "loss": 0.1487, "step": 602000 }, { "epoch": 174.54, "learning_rate": 6.365874855156431e-06, "loss": 0.1335, "step": 602500 }, { "epoch": 174.68, "learning_rate": 6.329663962920046e-06, "loss": 0.1338, "step": 603000 }, { "epoch": 174.83, "learning_rate": 6.2934530706836625e-06, "loss": 0.1461, "step": 603500 }, { "epoch": 174.97, "learning_rate": 6.257242178447277e-06, "loss": 0.1413, "step": 604000 }, { "epoch": 175.12, "learning_rate": 6.221031286210892e-06, "loss": 0.1545, "step": 604500 }, { "epoch": 175.26, "learning_rate": 6.184820393974508e-06, "loss": 0.1615, "step": 605000 }, { "epoch": 175.41, "learning_rate": 6.148609501738123e-06, "loss": 0.1325, "step": 605500 }, { "epoch": 175.55, "learning_rate": 6.112398609501739e-06, "loss": 0.1398, "step": 606000 }, { "epoch": 175.7, "learning_rate": 6.076187717265354e-06, "loss": 0.1413, "step": 606500 }, { "epoch": 175.84, "learning_rate": 6.039976825028968e-06, "loss": 0.159, "step": 607000 }, { "epoch": 175.98, "learning_rate": 6.0037659327925845e-06, "loss": 0.1591, "step": 607500 }, { "epoch": 176.13, "learning_rate": 5.9675550405562e-06, "loss": 0.141, "step": 608000 }, { "epoch": 176.27, "learning_rate": 5.931344148319814e-06, "loss": 0.1246, "step": 608500 }, { "epoch": 176.42, "learning_rate": 5.89513325608343e-06, "loss": 0.1274, "step": 609000 }, { "epoch": 176.56, "learning_rate": 5.8589223638470455e-06, "loss": 0.152, "step": 609500 }, { "epoch": 176.71, "learning_rate": 5.822711471610661e-06, "loss": 0.159, "step": 610000 }, { "epoch": 176.85, "learning_rate": 5.786500579374276e-06, "loss": 0.1337, "step": 610500 }, { "epoch": 177.0, "learning_rate": 5.750289687137891e-06, "loss": 0.1248, "step": 611000 }, { "epoch": 177.14, "learning_rate": 5.7140787949015065e-06, "loss": 0.1375, "step": 611500 }, { "epoch": 177.29, "learning_rate": 5.677867902665122e-06, "loss": 0.1386, "step": 612000 }, { "epoch": 177.43, "learning_rate": 5.641657010428737e-06, "loss": 0.1376, "step": 612500 }, { "epoch": 177.58, "learning_rate": 5.605446118192353e-06, "loss": 0.1288, "step": 613000 }, { "epoch": 177.72, "learning_rate": 5.5692352259559675e-06, "loss": 0.1624, "step": 613500 }, { "epoch": 177.87, "learning_rate": 5.533024333719583e-06, "loss": 0.1262, "step": 614000 }, { "epoch": 178.01, "learning_rate": 5.496813441483199e-06, "loss": 0.1688, "step": 614500 }, { "epoch": 178.16, "learning_rate": 5.460602549246813e-06, "loss": 0.1789, "step": 615000 }, { "epoch": 178.3, "learning_rate": 5.4243916570104285e-06, "loss": 0.1338, "step": 615500 }, { "epoch": 178.45, "learning_rate": 5.3881807647740446e-06, "loss": 0.14, "step": 616000 }, { "epoch": 178.59, "learning_rate": 5.35196987253766e-06, "loss": 0.1236, "step": 616500 }, { "epoch": 178.74, "learning_rate": 5.315758980301275e-06, "loss": 0.1449, "step": 617000 }, { "epoch": 178.88, "learning_rate": 5.27954808806489e-06, "loss": 0.129, "step": 617500 }, { "epoch": 179.03, "learning_rate": 5.2433371958285056e-06, "loss": 0.1373, "step": 618000 }, { "epoch": 179.17, "learning_rate": 5.207126303592121e-06, "loss": 0.121, "step": 618500 }, { "epoch": 179.32, "learning_rate": 5.170915411355736e-06, "loss": 0.1285, "step": 619000 }, { "epoch": 179.46, "learning_rate": 5.134704519119351e-06, "loss": 0.1462, "step": 619500 }, { "epoch": 179.61, "learning_rate": 5.0984936268829666e-06, "loss": 0.1212, "step": 620000 }, { "epoch": 179.75, "learning_rate": 5.062282734646582e-06, "loss": 0.138, "step": 620500 }, { "epoch": 179.9, "learning_rate": 5.026071842410197e-06, "loss": 0.1686, "step": 621000 }, { "epoch": 180.04, "learning_rate": 4.989860950173812e-06, "loss": 0.1319, "step": 621500 }, { "epoch": 180.19, "learning_rate": 4.9536500579374276e-06, "loss": 0.1327, "step": 622000 }, { "epoch": 180.33, "learning_rate": 4.917439165701044e-06, "loss": 0.1409, "step": 622500 }, { "epoch": 180.48, "learning_rate": 4.881228273464659e-06, "loss": 0.1282, "step": 623000 }, { "epoch": 180.62, "learning_rate": 4.845017381228273e-06, "loss": 0.1284, "step": 623500 }, { "epoch": 180.76, "learning_rate": 4.808806488991889e-06, "loss": 0.1314, "step": 624000 }, { "epoch": 180.91, "learning_rate": 4.772595596755505e-06, "loss": 0.1163, "step": 624500 }, { "epoch": 181.05, "learning_rate": 4.736384704519119e-06, "loss": 0.1055, "step": 625000 }, { "epoch": 181.2, "learning_rate": 4.700173812282735e-06, "loss": 0.1329, "step": 625500 }, { "epoch": 181.34, "learning_rate": 4.66396292004635e-06, "loss": 0.1317, "step": 626000 }, { "epoch": 181.49, "learning_rate": 4.627752027809966e-06, "loss": 0.1612, "step": 626500 }, { "epoch": 181.63, "learning_rate": 4.591541135573581e-06, "loss": 0.1269, "step": 627000 }, { "epoch": 181.78, "learning_rate": 4.555330243337196e-06, "loss": 0.1404, "step": 627500 }, { "epoch": 181.92, "learning_rate": 4.519119351100811e-06, "loss": 0.1434, "step": 628000 }, { "epoch": 182.07, "learning_rate": 4.482908458864427e-06, "loss": 0.1427, "step": 628500 }, { "epoch": 182.21, "learning_rate": 4.446697566628042e-06, "loss": 0.0988, "step": 629000 }, { "epoch": 182.36, "learning_rate": 4.410486674391657e-06, "loss": 0.1395, "step": 629500 }, { "epoch": 182.5, "learning_rate": 4.374275782155272e-06, "loss": 0.1365, "step": 630000 }, { "epoch": 182.65, "learning_rate": 4.338064889918888e-06, "loss": 0.1285, "step": 630500 }, { "epoch": 182.79, "learning_rate": 4.301853997682504e-06, "loss": 0.1275, "step": 631000 }, { "epoch": 182.94, "learning_rate": 4.265643105446118e-06, "loss": 0.1411, "step": 631500 }, { "epoch": 183.08, "learning_rate": 4.229432213209733e-06, "loss": 0.1175, "step": 632000 }, { "epoch": 183.23, "learning_rate": 4.1932213209733495e-06, "loss": 0.1253, "step": 632500 }, { "epoch": 183.37, "learning_rate": 4.157010428736964e-06, "loss": 0.1131, "step": 633000 }, { "epoch": 183.52, "learning_rate": 4.12079953650058e-06, "loss": 0.1263, "step": 633500 }, { "epoch": 183.66, "learning_rate": 4.084588644264195e-06, "loss": 0.1473, "step": 634000 }, { "epoch": 183.81, "learning_rate": 4.04837775202781e-06, "loss": 0.1214, "step": 634500 }, { "epoch": 183.95, "learning_rate": 4.012166859791426e-06, "loss": 0.1385, "step": 635000 }, { "epoch": 184.1, "learning_rate": 3.975955967555041e-06, "loss": 0.1243, "step": 635500 }, { "epoch": 184.24, "learning_rate": 3.939745075318655e-06, "loss": 0.1299, "step": 636000 }, { "epoch": 184.39, "learning_rate": 3.9035341830822715e-06, "loss": 0.1384, "step": 636500 }, { "epoch": 184.53, "learning_rate": 3.867323290845887e-06, "loss": 0.1267, "step": 637000 }, { "epoch": 184.68, "learning_rate": 3.831112398609502e-06, "loss": 0.1584, "step": 637500 }, { "epoch": 184.82, "learning_rate": 3.794901506373117e-06, "loss": 0.1168, "step": 638000 }, { "epoch": 184.97, "learning_rate": 3.7586906141367325e-06, "loss": 0.1163, "step": 638500 }, { "epoch": 185.11, "learning_rate": 3.722479721900348e-06, "loss": 0.1161, "step": 639000 }, { "epoch": 185.25, "learning_rate": 3.686268829663963e-06, "loss": 0.1268, "step": 639500 }, { "epoch": 185.4, "learning_rate": 3.650057937427578e-06, "loss": 0.1185, "step": 640000 }, { "epoch": 185.54, "learning_rate": 3.613847045191194e-06, "loss": 0.111, "step": 640500 }, { "epoch": 185.69, "learning_rate": 3.5776361529548087e-06, "loss": 0.1406, "step": 641000 }, { "epoch": 185.83, "learning_rate": 3.5414252607184244e-06, "loss": 0.1247, "step": 641500 }, { "epoch": 185.98, "learning_rate": 3.5052143684820396e-06, "loss": 0.1106, "step": 642000 }, { "epoch": 186.12, "learning_rate": 3.4690034762456544e-06, "loss": 0.1318, "step": 642500 }, { "epoch": 186.27, "learning_rate": 3.43279258400927e-06, "loss": 0.1082, "step": 643000 }, { "epoch": 186.41, "learning_rate": 3.3965816917728854e-06, "loss": 0.1264, "step": 643500 }, { "epoch": 186.56, "learning_rate": 3.360370799536501e-06, "loss": 0.1201, "step": 644000 }, { "epoch": 186.7, "learning_rate": 3.324159907300116e-06, "loss": 0.1267, "step": 644500 }, { "epoch": 186.85, "learning_rate": 3.2879490150637315e-06, "loss": 0.1309, "step": 645000 }, { "epoch": 186.99, "learning_rate": 3.2517381228273468e-06, "loss": 0.1151, "step": 645500 }, { "epoch": 187.14, "learning_rate": 3.2155272305909616e-06, "loss": 0.1188, "step": 646000 }, { "epoch": 187.28, "learning_rate": 3.1793163383545773e-06, "loss": 0.1265, "step": 646500 }, { "epoch": 187.43, "learning_rate": 3.143105446118193e-06, "loss": 0.119, "step": 647000 }, { "epoch": 187.57, "learning_rate": 3.1068945538818078e-06, "loss": 0.1231, "step": 647500 }, { "epoch": 187.72, "learning_rate": 3.070683661645423e-06, "loss": 0.1197, "step": 648000 }, { "epoch": 187.86, "learning_rate": 3.0344727694090383e-06, "loss": 0.1276, "step": 648500 }, { "epoch": 188.01, "learning_rate": 2.998261877172654e-06, "loss": 0.1248, "step": 649000 }, { "epoch": 188.15, "learning_rate": 2.9620509849362688e-06, "loss": 0.1389, "step": 649500 }, { "epoch": 188.3, "learning_rate": 2.9258400926998844e-06, "loss": 0.1192, "step": 650000 }, { "epoch": 188.44, "learning_rate": 2.8896292004634997e-06, "loss": 0.1129, "step": 650500 }, { "epoch": 188.59, "learning_rate": 2.853418308227115e-06, "loss": 0.1014, "step": 651000 }, { "epoch": 188.73, "learning_rate": 2.81720741599073e-06, "loss": 0.132, "step": 651500 }, { "epoch": 188.88, "learning_rate": 2.7809965237543454e-06, "loss": 0.1383, "step": 652000 }, { "epoch": 189.02, "learning_rate": 2.744785631517961e-06, "loss": 0.1387, "step": 652500 }, { "epoch": 189.17, "learning_rate": 2.708574739281576e-06, "loss": 0.1105, "step": 653000 }, { "epoch": 189.31, "learning_rate": 2.672363847045191e-06, "loss": 0.1212, "step": 653500 }, { "epoch": 189.46, "learning_rate": 2.636152954808807e-06, "loss": 0.1115, "step": 654000 }, { "epoch": 189.6, "learning_rate": 2.599942062572422e-06, "loss": 0.1256, "step": 654500 }, { "epoch": 189.75, "learning_rate": 2.563731170336037e-06, "loss": 0.1287, "step": 655000 }, { "epoch": 189.89, "learning_rate": 2.5275202780996526e-06, "loss": 0.117, "step": 655500 }, { "epoch": 190.03, "learning_rate": 2.491309385863268e-06, "loss": 0.1064, "step": 656000 }, { "epoch": 190.18, "learning_rate": 2.455098493626883e-06, "loss": 0.1338, "step": 656500 }, { "epoch": 190.32, "learning_rate": 2.4188876013904983e-06, "loss": 0.1168, "step": 657000 }, { "epoch": 190.47, "learning_rate": 2.3826767091541136e-06, "loss": 0.1258, "step": 657500 }, { "epoch": 190.61, "learning_rate": 2.3464658169177293e-06, "loss": 0.1021, "step": 658000 }, { "epoch": 190.76, "learning_rate": 2.310254924681344e-06, "loss": 0.1199, "step": 658500 }, { "epoch": 190.9, "learning_rate": 2.2740440324449593e-06, "loss": 0.1065, "step": 659000 }, { "epoch": 191.05, "learning_rate": 2.237833140208575e-06, "loss": 0.1153, "step": 659500 }, { "epoch": 191.19, "learning_rate": 2.2016222479721903e-06, "loss": 0.1176, "step": 660000 }, { "epoch": 191.34, "learning_rate": 2.1654113557358055e-06, "loss": 0.1151, "step": 660500 }, { "epoch": 191.48, "learning_rate": 2.1292004634994208e-06, "loss": 0.1117, "step": 661000 }, { "epoch": 191.63, "learning_rate": 2.092989571263036e-06, "loss": 0.1226, "step": 661500 }, { "epoch": 191.77, "learning_rate": 2.0567786790266513e-06, "loss": 0.0943, "step": 662000 }, { "epoch": 191.92, "learning_rate": 2.0205677867902665e-06, "loss": 0.1238, "step": 662500 }, { "epoch": 192.06, "learning_rate": 1.984356894553882e-06, "loss": 0.1108, "step": 663000 }, { "epoch": 192.21, "learning_rate": 1.9481460023174974e-06, "loss": 0.1135, "step": 663500 }, { "epoch": 192.35, "learning_rate": 1.9119351100811122e-06, "loss": 0.1023, "step": 664000 }, { "epoch": 192.5, "learning_rate": 1.875724217844728e-06, "loss": 0.107, "step": 664500 }, { "epoch": 192.64, "learning_rate": 1.8395133256083432e-06, "loss": 0.1089, "step": 665000 }, { "epoch": 192.79, "learning_rate": 1.8033024333719582e-06, "loss": 0.104, "step": 665500 }, { "epoch": 192.93, "learning_rate": 1.7670915411355737e-06, "loss": 0.1172, "step": 666000 }, { "epoch": 193.08, "learning_rate": 1.730880648899189e-06, "loss": 0.106, "step": 666500 }, { "epoch": 193.22, "learning_rate": 1.6946697566628044e-06, "loss": 0.0994, "step": 667000 }, { "epoch": 193.37, "learning_rate": 1.6584588644264196e-06, "loss": 0.105, "step": 667500 }, { "epoch": 193.51, "learning_rate": 1.6222479721900347e-06, "loss": 0.1141, "step": 668000 }, { "epoch": 193.66, "learning_rate": 1.5860370799536503e-06, "loss": 0.1127, "step": 668500 }, { "epoch": 193.8, "learning_rate": 1.5498261877172654e-06, "loss": 0.1112, "step": 669000 }, { "epoch": 193.95, "learning_rate": 1.5136152954808808e-06, "loss": 0.1333, "step": 669500 }, { "epoch": 194.09, "learning_rate": 1.477404403244496e-06, "loss": 0.1027, "step": 670000 }, { "epoch": 194.24, "learning_rate": 1.4411935110081113e-06, "loss": 0.1204, "step": 670500 }, { "epoch": 194.38, "learning_rate": 1.4049826187717266e-06, "loss": 0.116, "step": 671000 }, { "epoch": 194.52, "learning_rate": 1.3687717265353418e-06, "loss": 0.1152, "step": 671500 }, { "epoch": 194.67, "learning_rate": 1.3325608342989573e-06, "loss": 0.1211, "step": 672000 }, { "epoch": 194.81, "learning_rate": 1.2963499420625725e-06, "loss": 0.103, "step": 672500 }, { "epoch": 194.96, "learning_rate": 1.2601390498261878e-06, "loss": 0.1091, "step": 673000 }, { "epoch": 195.1, "learning_rate": 1.223928157589803e-06, "loss": 0.1153, "step": 673500 }, { "epoch": 195.25, "learning_rate": 1.1877172653534185e-06, "loss": 0.1098, "step": 674000 }, { "epoch": 195.39, "learning_rate": 1.1515063731170337e-06, "loss": 0.1263, "step": 674500 }, { "epoch": 195.54, "learning_rate": 1.115295480880649e-06, "loss": 0.1176, "step": 675000 }, { "epoch": 195.68, "learning_rate": 1.0790845886442642e-06, "loss": 0.1088, "step": 675500 }, { "epoch": 195.83, "learning_rate": 1.0428736964078795e-06, "loss": 0.1062, "step": 676000 }, { "epoch": 195.97, "learning_rate": 1.006662804171495e-06, "loss": 0.1102, "step": 676500 }, { "epoch": 196.12, "learning_rate": 9.7045191193511e-07, "loss": 0.1305, "step": 677000 }, { "epoch": 196.26, "learning_rate": 9.342410196987254e-07, "loss": 0.1213, "step": 677500 }, { "epoch": 196.41, "learning_rate": 8.980301274623407e-07, "loss": 0.1173, "step": 678000 }, { "epoch": 196.55, "learning_rate": 8.61819235225956e-07, "loss": 0.1198, "step": 678500 }, { "epoch": 196.7, "learning_rate": 8.256083429895712e-07, "loss": 0.1099, "step": 679000 }, { "epoch": 196.84, "learning_rate": 7.893974507531865e-07, "loss": 0.1051, "step": 679500 }, { "epoch": 196.99, "learning_rate": 7.531865585168019e-07, "loss": 0.11, "step": 680000 }, { "epoch": 197.13, "learning_rate": 7.169756662804171e-07, "loss": 0.1015, "step": 680500 }, { "epoch": 197.28, "learning_rate": 6.807647740440325e-07, "loss": 0.1107, "step": 681000 }, { "epoch": 197.42, "learning_rate": 6.445538818076477e-07, "loss": 0.0984, "step": 681500 }, { "epoch": 197.57, "learning_rate": 6.083429895712631e-07, "loss": 0.1129, "step": 682000 }, { "epoch": 197.71, "learning_rate": 5.721320973348784e-07, "loss": 0.1154, "step": 682500 }, { "epoch": 197.86, "learning_rate": 5.359212050984936e-07, "loss": 0.1049, "step": 683000 }, { "epoch": 198.0, "learning_rate": 4.99710312862109e-07, "loss": 0.1267, "step": 683500 }, { "epoch": 198.15, "learning_rate": 4.6349942062572426e-07, "loss": 0.1142, "step": 684000 }, { "epoch": 198.29, "learning_rate": 4.2728852838933956e-07, "loss": 0.0944, "step": 684500 }, { "epoch": 198.44, "learning_rate": 3.910776361529548e-07, "loss": 0.1224, "step": 685000 }, { "epoch": 198.58, "learning_rate": 3.548667439165701e-07, "loss": 0.1043, "step": 685500 }, { "epoch": 198.73, "learning_rate": 3.186558516801854e-07, "loss": 0.1193, "step": 686000 }, { "epoch": 198.87, "learning_rate": 2.824449594438007e-07, "loss": 0.1056, "step": 686500 }, { "epoch": 199.02, "learning_rate": 2.4623406720741596e-07, "loss": 0.1252, "step": 687000 }, { "epoch": 199.16, "learning_rate": 2.1002317497103131e-07, "loss": 0.1309, "step": 687500 }, { "epoch": 199.3, "learning_rate": 1.738122827346466e-07, "loss": 0.106, "step": 688000 }, { "epoch": 199.45, "learning_rate": 1.376013904982619e-07, "loss": 0.1152, "step": 688500 }, { "epoch": 199.59, "learning_rate": 1.0139049826187717e-07, "loss": 0.1014, "step": 689000 }, { "epoch": 199.74, "learning_rate": 6.517960602549247e-08, "loss": 0.1042, "step": 689500 }, { "epoch": 199.88, "learning_rate": 2.8968713789107766e-08, "loss": 0.1189, "step": 690000 } ], "max_steps": 690400, "num_train_epochs": 200, "total_flos": 3.3589830897477504e+16, "trial_name": null, "trial_params": null }