{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.885489299682293, "global_step": 606000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.7856418177137756e-08, "loss": 2.1137, "step": 500 }, { "epoch": 0.01, "learning_rate": 7.571283635427551e-08, "loss": 2.0821, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.1356925453141326e-07, "loss": 2.1063, "step": 1500 }, { "epoch": 0.02, "learning_rate": 1.5142567270855102e-07, "loss": 2.0393, "step": 2000 }, { "epoch": 0.03, "learning_rate": 1.8928209088568877e-07, "loss": 2.0135, "step": 2500 }, { "epoch": 0.03, "learning_rate": 2.2713850906282652e-07, "loss": 1.9837, "step": 3000 }, { "epoch": 0.04, "learning_rate": 2.6499492723996425e-07, "loss": 1.9524, "step": 3500 }, { "epoch": 0.04, "learning_rate": 3.0277563258074776e-07, "loss": 1.9752, "step": 4000 }, { "epoch": 0.05, "learning_rate": 3.406320507578855e-07, "loss": 1.9403, "step": 4500 }, { "epoch": 0.05, "learning_rate": 3.784884689350233e-07, "loss": 1.9234, "step": 5000 }, { "epoch": 0.06, "learning_rate": 4.16344887112161e-07, "loss": 1.9008, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.542013052892988e-07, "loss": 1.9152, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.920577234664366e-07, "loss": 1.8736, "step": 6500 }, { "epoch": 0.07, "learning_rate": 5.299141416435743e-07, "loss": 1.8995, "step": 7000 }, { "epoch": 0.08, "learning_rate": 5.677705598207121e-07, "loss": 1.8825, "step": 7500 }, { "epoch": 0.08, "learning_rate": 6.055512651614955e-07, "loss": 1.8569, "step": 8000 }, { "epoch": 0.09, "learning_rate": 6.433319705022791e-07, "loss": 1.8883, "step": 8500 }, { "epoch": 0.1, "learning_rate": 6.811883886794167e-07, "loss": 1.8776, "step": 9000 }, { "epoch": 0.1, "learning_rate": 7.190448068565545e-07, "loss": 1.8876, "step": 9500 }, { "epoch": 0.11, "learning_rate": 7.569012250336922e-07, "loss": 1.8702, "step": 10000 }, { "epoch": 0.11, "learning_rate": 7.947576432108301e-07, "loss": 1.8573, "step": 10500 }, { "epoch": 0.12, "learning_rate": 8.325383485516135e-07, "loss": 1.857, "step": 11000 }, { "epoch": 0.12, "learning_rate": 8.703947667287513e-07, "loss": 1.8565, "step": 11500 }, { "epoch": 0.13, "learning_rate": 9.08251184905889e-07, "loss": 1.831, "step": 12000 }, { "epoch": 0.13, "learning_rate": 9.461076030830267e-07, "loss": 1.8091, "step": 12500 }, { "epoch": 0.14, "learning_rate": 9.839640212601645e-07, "loss": 1.8446, "step": 13000 }, { "epoch": 0.14, "learning_rate": 1.021744726600948e-06, "loss": 1.8294, "step": 13500 }, { "epoch": 0.15, "learning_rate": 1.0595254319417315e-06, "loss": 1.8024, "step": 14000 }, { "epoch": 0.15, "learning_rate": 1.097381850118869e-06, "loss": 1.8247, "step": 14500 }, { "epoch": 0.16, "learning_rate": 1.135238268296007e-06, "loss": 1.8342, "step": 15000 }, { "epoch": 0.16, "learning_rate": 1.1730946864731448e-06, "loss": 1.8464, "step": 15500 }, { "epoch": 0.17, "learning_rate": 1.2109511046502824e-06, "loss": 1.8033, "step": 16000 }, { "epoch": 0.17, "learning_rate": 1.248731809991066e-06, "loss": 1.7894, "step": 16500 }, { "epoch": 0.18, "learning_rate": 1.2865882281682038e-06, "loss": 1.8257, "step": 17000 }, { "epoch": 0.19, "learning_rate": 1.3244446463453414e-06, "loss": 1.8199, "step": 17500 }, { "epoch": 0.19, "learning_rate": 1.3623010645224793e-06, "loss": 1.8294, "step": 18000 }, { "epoch": 0.2, "learning_rate": 1.400157482699617e-06, "loss": 1.8009, "step": 18500 }, { "epoch": 0.2, "learning_rate": 1.4380139008767545e-06, "loss": 1.8019, "step": 19000 }, { "epoch": 0.21, "learning_rate": 1.4758703190538926e-06, "loss": 1.7872, "step": 19500 }, { "epoch": 0.21, "learning_rate": 1.5137267372310303e-06, "loss": 1.7596, "step": 20000 }, { "epoch": 0.22, "learning_rate": 1.5515074425718137e-06, "loss": 1.8034, "step": 20500 }, { "epoch": 0.22, "learning_rate": 1.5893638607489514e-06, "loss": 1.8041, "step": 21000 }, { "epoch": 0.23, "learning_rate": 1.6272202789260894e-06, "loss": 1.8091, "step": 21500 }, { "epoch": 0.23, "learning_rate": 1.665076697103227e-06, "loss": 1.818, "step": 22000 }, { "epoch": 0.24, "learning_rate": 1.7028574024440105e-06, "loss": 1.7801, "step": 22500 }, { "epoch": 0.24, "learning_rate": 1.7407138206211482e-06, "loss": 1.798, "step": 23000 }, { "epoch": 0.25, "learning_rate": 1.7785702387982858e-06, "loss": 1.7964, "step": 23500 }, { "epoch": 0.25, "learning_rate": 1.8164266569754239e-06, "loss": 1.7742, "step": 24000 }, { "epoch": 0.26, "learning_rate": 1.8542830751525615e-06, "loss": 1.779, "step": 24500 }, { "epoch": 0.26, "learning_rate": 1.892063780493345e-06, "loss": 1.7976, "step": 25000 }, { "epoch": 0.27, "learning_rate": 1.9299201986704826e-06, "loss": 1.8047, "step": 25500 }, { "epoch": 0.28, "learning_rate": 1.9677766168476207e-06, "loss": 1.7778, "step": 26000 }, { "epoch": 0.28, "learning_rate": 2.0056330350247583e-06, "loss": 1.7395, "step": 26500 }, { "epoch": 0.29, "learning_rate": 2.043413740365542e-06, "loss": 1.7978, "step": 27000 }, { "epoch": 0.29, "learning_rate": 2.0812701585426794e-06, "loss": 1.8077, "step": 27500 }, { "epoch": 0.3, "learning_rate": 2.119126576719817e-06, "loss": 1.7775, "step": 28000 }, { "epoch": 0.3, "learning_rate": 2.156982994896955e-06, "loss": 1.7742, "step": 28500 }, { "epoch": 0.31, "learning_rate": 2.194839413074093e-06, "loss": 1.7911, "step": 29000 }, { "epoch": 0.31, "learning_rate": 2.2326201184148763e-06, "loss": 1.7489, "step": 29500 }, { "epoch": 0.32, "learning_rate": 2.270476536592014e-06, "loss": 1.7773, "step": 30000 }, { "epoch": 0.32, "learning_rate": 2.308332954769152e-06, "loss": 1.7914, "step": 30500 }, { "epoch": 0.33, "learning_rate": 2.3461893729462896e-06, "loss": 1.7394, "step": 31000 }, { "epoch": 0.33, "learning_rate": 2.3840457911234273e-06, "loss": 1.7794, "step": 31500 }, { "epoch": 0.34, "learning_rate": 2.4218264964642107e-06, "loss": 1.7762, "step": 32000 }, { "epoch": 0.34, "learning_rate": 4.985384811630502e-06, "loss": 1.7527, "step": 32500 }, { "epoch": 0.35, "learning_rate": 4.984931927529385e-06, "loss": 1.7468, "step": 33000 }, { "epoch": 0.36, "learning_rate": 4.984472154475628e-06, "loss": 1.7746, "step": 33500 }, { "epoch": 0.36, "learning_rate": 4.984006433938366e-06, "loss": 1.7635, "step": 34000 }, { "epoch": 0.37, "learning_rate": 4.983532900593762e-06, "loss": 1.7936, "step": 34500 }, { "epoch": 0.37, "learning_rate": 4.983052482175027e-06, "loss": 1.7431, "step": 35000 }, { "epoch": 0.38, "learning_rate": 4.98256518001402e-06, "loss": 1.7891, "step": 35500 }, { "epoch": 0.38, "learning_rate": 4.982070995461684e-06, "loss": 1.7599, "step": 36000 }, { "epoch": 0.39, "learning_rate": 4.981570938885529e-06, "loss": 1.7501, "step": 36500 }, { "epoch": 0.39, "learning_rate": 4.9810630074375465e-06, "loss": 1.7679, "step": 37000 }, { "epoch": 0.4, "learning_rate": 4.980548197762693e-06, "loss": 1.7317, "step": 37500 }, { "epoch": 0.4, "learning_rate": 4.980026511288172e-06, "loss": 1.7846, "step": 38000 }, { "epoch": 0.41, "learning_rate": 4.979497949460249e-06, "loss": 1.7632, "step": 38500 }, { "epoch": 0.41, "learning_rate": 4.9789625137442505e-06, "loss": 1.759, "step": 39000 }, { "epoch": 0.42, "learning_rate": 4.97842020562456e-06, "loss": 1.7392, "step": 39500 }, { "epoch": 0.42, "learning_rate": 4.977872131818803e-06, "loss": 1.768, "step": 40000 }, { "epoch": 0.43, "learning_rate": 4.977316097158305e-06, "loss": 1.7695, "step": 40500 }, { "epoch": 0.43, "learning_rate": 4.976753194658462e-06, "loss": 1.7787, "step": 41000 }, { "epoch": 0.44, "learning_rate": 4.976183425879801e-06, "loss": 1.7533, "step": 41500 }, { "epoch": 0.45, "learning_rate": 4.9756067924018844e-06, "loss": 1.7311, "step": 42000 }, { "epoch": 0.45, "learning_rate": 4.97502446966477e-06, "loss": 1.7347, "step": 42500 }, { "epoch": 0.46, "learning_rate": 4.9744341253244985e-06, "loss": 1.7639, "step": 43000 }, { "epoch": 0.46, "learning_rate": 4.973836921134542e-06, "loss": 1.7626, "step": 43500 }, { "epoch": 0.47, "learning_rate": 4.97323407371866e-06, "loss": 1.7603, "step": 44000 }, { "epoch": 0.47, "learning_rate": 4.972623168526572e-06, "loss": 1.7346, "step": 44500 }, { "epoch": 0.48, "learning_rate": 4.972005408505297e-06, "loss": 1.7621, "step": 45000 }, { "epoch": 0.48, "learning_rate": 4.971380795367446e-06, "loss": 1.7651, "step": 45500 }, { "epoch": 0.49, "learning_rate": 4.970749330844626e-06, "loss": 1.7428, "step": 46000 }, { "epoch": 0.49, "learning_rate": 4.970111016687442e-06, "loss": 1.7332, "step": 46500 }, { "epoch": 0.5, "learning_rate": 4.9694658546654854e-06, "loss": 1.7329, "step": 47000 }, { "epoch": 0.5, "learning_rate": 4.968813846567331e-06, "loss": 1.7552, "step": 47500 }, { "epoch": 0.51, "learning_rate": 4.968154994200538e-06, "loss": 1.7563, "step": 48000 }, { "epoch": 0.51, "learning_rate": 4.967489299391632e-06, "loss": 1.7412, "step": 48500 }, { "epoch": 0.52, "learning_rate": 4.966818115882607e-06, "loss": 1.7054, "step": 49000 }, { "epoch": 0.52, "learning_rate": 4.966138755420534e-06, "loss": 1.7372, "step": 49500 }, { "epoch": 0.53, "learning_rate": 4.96545255810595e-06, "loss": 1.7653, "step": 50000 }, { "epoch": 0.54, "learning_rate": 4.964759525841192e-06, "loss": 1.7633, "step": 50500 }, { "epoch": 0.54, "learning_rate": 4.964059660547548e-06, "loss": 1.733, "step": 51000 }, { "epoch": 0.55, "learning_rate": 4.963354384374138e-06, "loss": 1.7221, "step": 51500 }, { "epoch": 0.55, "learning_rate": 4.962640872518639e-06, "loss": 1.7226, "step": 52000 }, { "epoch": 0.56, "learning_rate": 4.961920533507775e-06, "loss": 1.7142, "step": 52500 }, { "epoch": 0.56, "learning_rate": 4.961193369338535e-06, "loss": 1.7193, "step": 53000 }, { "epoch": 0.57, "learning_rate": 4.960460856809599e-06, "loss": 1.7472, "step": 53500 }, { "epoch": 0.57, "learning_rate": 4.9597200620304265e-06, "loss": 1.7407, "step": 54000 }, { "epoch": 0.58, "learning_rate": 4.958972448193223e-06, "loss": 1.741, "step": 54500 }, { "epoch": 0.58, "learning_rate": 4.958218017370592e-06, "loss": 1.7572, "step": 55000 }, { "epoch": 0.59, "learning_rate": 4.957458300945332e-06, "loss": 1.7177, "step": 55500 }, { "epoch": 0.59, "learning_rate": 4.9566902560686915e-06, "loss": 1.7255, "step": 56000 }, { "epoch": 0.6, "learning_rate": 4.955915400533522e-06, "loss": 1.7255, "step": 56500 }, { "epoch": 0.6, "learning_rate": 4.955133736487948e-06, "loss": 1.7173, "step": 57000 }, { "epoch": 0.61, "learning_rate": 4.954346849831028e-06, "loss": 1.7042, "step": 57500 }, { "epoch": 0.61, "learning_rate": 4.953551588890636e-06, "loss": 1.7207, "step": 58000 }, { "epoch": 0.62, "learning_rate": 4.952749525993012e-06, "loss": 1.7328, "step": 58500 }, { "epoch": 0.63, "learning_rate": 4.9519406633617066e-06, "loss": 1.6936, "step": 59000 }, { "epoch": 0.63, "learning_rate": 4.9511266413417645e-06, "loss": 1.7368, "step": 59500 }, { "epoch": 0.64, "learning_rate": 4.95030419957734e-06, "loss": 1.7637, "step": 60000 }, { "epoch": 0.64, "learning_rate": 4.9494749648583875e-06, "loss": 1.7161, "step": 60500 }, { "epoch": 0.65, "learning_rate": 4.94863893948379e-06, "loss": 1.7069, "step": 61000 }, { "epoch": 0.65, "learning_rate": 4.9477978181718865e-06, "loss": 1.7084, "step": 61500 }, { "epoch": 0.66, "learning_rate": 4.946949937970748e-06, "loss": 1.7007, "step": 62000 }, { "epoch": 0.66, "learning_rate": 4.946093581740578e-06, "loss": 1.714, "step": 62500 }, { "epoch": 0.67, "learning_rate": 4.9452304442289365e-06, "loss": 1.7228, "step": 63000 }, { "epoch": 0.67, "learning_rate": 4.944360527828692e-06, "loss": 1.7279, "step": 63500 }, { "epoch": 0.68, "learning_rate": 4.943483834951505e-06, "loss": 1.7026, "step": 64000 }, { "epoch": 0.68, "learning_rate": 4.942602141720543e-06, "loss": 1.7094, "step": 64500 }, { "epoch": 0.69, "learning_rate": 4.941711916740329e-06, "loss": 1.7237, "step": 65000 }, { "epoch": 0.69, "learning_rate": 4.940814922625891e-06, "loss": 1.7269, "step": 65500 }, { "epoch": 0.7, "learning_rate": 4.9399111618639576e-06, "loss": 1.7229, "step": 66000 }, { "epoch": 0.47, "learning_rate": 4.972827756988442e-06, "loss": 1.5635, "step": 66500 }, { "epoch": 0.47, "learning_rate": 4.972418204632646e-06, "loss": 1.6282, "step": 67000 }, { "epoch": 0.48, "learning_rate": 4.972006434197012e-06, "loss": 1.6165, "step": 67500 }, { "epoch": 0.48, "learning_rate": 4.971590795806552e-06, "loss": 1.5994, "step": 68000 }, { "epoch": 0.48, "learning_rate": 4.971172952524728e-06, "loss": 1.6169, "step": 68500 }, { "epoch": 0.49, "learning_rate": 4.970751230143974e-06, "loss": 1.5898, "step": 69000 }, { "epoch": 0.49, "learning_rate": 4.970326463500062e-06, "loss": 1.5827, "step": 69500 }, { "epoch": 0.49, "learning_rate": 4.969898653116354e-06, "loss": 1.5732, "step": 70000 }, { "epoch": 0.5, "learning_rate": 4.969467799519966e-06, "loss": 1.5965, "step": 70500 }, { "epoch": 0.5, "learning_rate": 4.969033903241761e-06, "loss": 1.6081, "step": 71000 }, { "epoch": 0.51, "learning_rate": 4.9685969648163515e-06, "loss": 1.5901, "step": 71500 }, { "epoch": 0.51, "learning_rate": 4.968156984782098e-06, "loss": 1.6251, "step": 72000 }, { "epoch": 0.51, "learning_rate": 4.9677139636811075e-06, "loss": 1.5917, "step": 72500 }, { "epoch": 0.52, "learning_rate": 4.967267902059239e-06, "loss": 1.5778, "step": 73000 }, { "epoch": 0.52, "learning_rate": 4.966818800466091e-06, "loss": 1.5604, "step": 73500 }, { "epoch": 0.52, "learning_rate": 4.9663666594550116e-06, "loss": 1.6055, "step": 74000 }, { "epoch": 0.53, "learning_rate": 4.965912392975248e-06, "loss": 1.5942, "step": 74500 }, { "epoch": 0.53, "learning_rate": 4.9654541808793646e-06, "loss": 1.6109, "step": 75000 }, { "epoch": 0.53, "learning_rate": 4.964992931046925e-06, "loss": 1.6153, "step": 75500 }, { "epoch": 0.54, "learning_rate": 4.964528644046248e-06, "loss": 1.6005, "step": 76000 }, { "epoch": 0.54, "learning_rate": 4.964062258126721e-06, "loss": 1.597, "step": 76500 }, { "epoch": 0.54, "learning_rate": 4.963591904580939e-06, "loss": 1.6027, "step": 77000 }, { "epoch": 0.55, "learning_rate": 4.96311851559315e-06, "loss": 1.5652, "step": 77500 }, { "epoch": 0.55, "learning_rate": 4.962642091746627e-06, "loss": 1.5838, "step": 78000 }, { "epoch": 0.55, "learning_rate": 4.962162633628382e-06, "loss": 1.5835, "step": 78500 }, { "epoch": 0.56, "learning_rate": 4.961681109839982e-06, "loss": 1.5735, "step": 79000 }, { "epoch": 0.56, "learning_rate": 4.961195591019855e-06, "loss": 1.5813, "step": 79500 }, { "epoch": 0.57, "learning_rate": 4.96070703971027e-06, "loss": 1.6206, "step": 80000 }, { "epoch": 0.57, "learning_rate": 4.960215456513178e-06, "loss": 1.5827, "step": 80500 }, { "epoch": 0.57, "learning_rate": 4.959721834288042e-06, "loss": 1.6156, "step": 81000 }, { "epoch": 0.58, "learning_rate": 4.959224195197477e-06, "loss": 1.5982, "step": 81500 }, { "epoch": 0.58, "learning_rate": 4.95872352604645e-06, "loss": 1.6165, "step": 82000 }, { "epoch": 0.58, "learning_rate": 4.958219827451845e-06, "loss": 1.6277, "step": 82500 }, { "epoch": 0.59, "learning_rate": 4.957714116511465e-06, "loss": 1.5929, "step": 83000 }, { "epoch": 0.59, "learning_rate": 4.9572043669510584e-06, "loss": 1.5988, "step": 83500 }, { "epoch": 0.59, "learning_rate": 4.956691589818862e-06, "loss": 1.5717, "step": 84000 }, { "epoch": 0.6, "learning_rate": 4.956175785746677e-06, "loss": 1.5993, "step": 84500 }, { "epoch": 0.6, "learning_rate": 4.955657996050618e-06, "loss": 1.5842, "step": 85000 }, { "epoch": 0.6, "learning_rate": 4.955136146059475e-06, "loss": 1.5906, "step": 85500 }, { "epoch": 0.61, "learning_rate": 4.954611271044838e-06, "loss": 1.5686, "step": 86000 }, { "epoch": 0.61, "learning_rate": 4.954083371653418e-06, "loss": 1.59, "step": 86500 }, { "epoch": 0.61, "learning_rate": 4.9535535133991276e-06, "loss": 1.5915, "step": 87000 }, { "epoch": 0.62, "learning_rate": 4.953019573254659e-06, "loss": 1.5991, "step": 87500 }, { "epoch": 0.62, "learning_rate": 4.95248261069457e-06, "loss": 1.6045, "step": 88000 }, { "epoch": 0.63, "learning_rate": 4.951942626380462e-06, "loss": 1.5585, "step": 88500 }, { "epoch": 0.63, "learning_rate": 4.951399620977662e-06, "loss": 1.6104, "step": 89000 }, { "epoch": 0.63, "learning_rate": 4.950854690220794e-06, "loss": 1.6091, "step": 89500 }, { "epoch": 0.64, "learning_rate": 4.9503056506902935e-06, "loss": 1.6397, "step": 90000 }, { "epoch": 0.64, "learning_rate": 4.9497535920880514e-06, "loss": 1.5889, "step": 90500 }, { "epoch": 0.64, "learning_rate": 4.949198515094271e-06, "loss": 1.5822, "step": 91000 }, { "epoch": 0.65, "learning_rate": 4.948641539593493e-06, "loss": 1.5949, "step": 91500 }, { "epoch": 0.65, "learning_rate": 4.948081559127373e-06, "loss": 1.573, "step": 92000 }, { "epoch": 0.65, "learning_rate": 4.947517443141312e-06, "loss": 1.5856, "step": 92500 }, { "epoch": 0.66, "learning_rate": 4.946950311518915e-06, "loss": 1.6034, "step": 93000 }, { "epoch": 0.66, "learning_rate": 4.94638016495896e-06, "loss": 1.5941, "step": 93500 }, { "epoch": 0.66, "learning_rate": 4.945807004163935e-06, "loss": 1.5872, "step": 94000 }, { "epoch": 0.67, "learning_rate": 4.945230829840042e-06, "loss": 1.604, "step": 94500 }, { "epoch": 0.67, "learning_rate": 4.944651642697199e-06, "loss": 1.5954, "step": 95000 }, { "epoch": 0.67, "learning_rate": 4.944069443449033e-06, "loss": 1.6117, "step": 95500 }, { "epoch": 0.68, "learning_rate": 4.943485406239044e-06, "loss": 1.5726, "step": 96000 }, { "epoch": 0.68, "learning_rate": 4.942897190956574e-06, "loss": 1.5713, "step": 96500 }, { "epoch": 0.69, "learning_rate": 4.9423059657304774e-06, "loss": 1.5863, "step": 97000 }, { "epoch": 0.69, "learning_rate": 4.941711731289216e-06, "loss": 1.6008, "step": 97500 }, { "epoch": 0.69, "learning_rate": 4.941115685852782e-06, "loss": 1.609, "step": 98000 }, { "epoch": 0.7, "learning_rate": 4.9405154411961575e-06, "loss": 1.6016, "step": 98500 }, { "epoch": 0.7, "learning_rate": 4.939913399034342e-06, "loss": 1.5992, "step": 99000 }, { "epoch": 0.7, "learning_rate": 4.939307147114736e-06, "loss": 1.6742, "step": 99500 }, { "epoch": 0.71, "learning_rate": 4.938697889674869e-06, "loss": 1.689, "step": 100000 }, { "epoch": 0.71, "learning_rate": 4.938085627465422e-06, "loss": 1.6779, "step": 100500 }, { "epoch": 0.71, "learning_rate": 4.937470361240773e-06, "loss": 1.694, "step": 101000 }, { "epoch": 0.72, "learning_rate": 4.936852091759006e-06, "loss": 1.6757, "step": 101500 }, { "epoch": 0.72, "learning_rate": 4.936230819781902e-06, "loss": 1.7025, "step": 102000 }, { "epoch": 0.72, "learning_rate": 4.935606546074945e-06, "loss": 1.6897, "step": 102500 }, { "epoch": 0.73, "learning_rate": 4.934980528951094e-06, "loss": 1.7087, "step": 103000 }, { "epoch": 0.73, "learning_rate": 4.9343502600952685e-06, "loss": 1.6645, "step": 103500 }, { "epoch": 0.73, "learning_rate": 4.9337169918266646e-06, "loss": 1.6942, "step": 104000 }, { "epoch": 0.74, "learning_rate": 4.9330807249255455e-06, "loss": 1.6854, "step": 104500 }, { "epoch": 0.74, "learning_rate": 4.932442741696698e-06, "loss": 1.6781, "step": 105000 }, { "epoch": 0.75, "learning_rate": 4.931800485879448e-06, "loss": 1.6731, "step": 105500 }, { "epoch": 0.75, "learning_rate": 4.931155233791048e-06, "loss": 1.6978, "step": 106000 }, { "epoch": 0.75, "learning_rate": 4.930506986226528e-06, "loss": 1.6978, "step": 106500 }, { "epoch": 0.76, "learning_rate": 4.929857049457245e-06, "loss": 1.6624, "step": 107000 }, { "epoch": 0.76, "learning_rate": 4.929202819327277e-06, "loss": 1.6721, "step": 107500 }, { "epoch": 0.76, "learning_rate": 4.9285455961267984e-06, "loss": 1.6889, "step": 108000 }, { "epoch": 0.77, "learning_rate": 4.927886704082245e-06, "loss": 1.7113, "step": 108500 }, { "epoch": 0.77, "learning_rate": 4.927223503155845e-06, "loss": 1.7074, "step": 109000 }, { "epoch": 0.77, "learning_rate": 4.92655731159769e-06, "loss": 1.6816, "step": 109500 }, { "epoch": 0.78, "learning_rate": 4.925889471574631e-06, "loss": 1.6795, "step": 110000 }, { "epoch": 0.78, "learning_rate": 4.925217307196282e-06, "loss": 1.7386, "step": 110500 }, { "epoch": 0.78, "learning_rate": 4.924542154658053e-06, "loss": 1.6714, "step": 111000 }, { "epoch": 0.79, "learning_rate": 4.9238640147918145e-06, "loss": 1.699, "step": 111500 }, { "epoch": 0.79, "learning_rate": 4.9231828884331145e-06, "loss": 1.6785, "step": 112000 }, { "epoch": 0.79, "learning_rate": 4.9224987764211826e-06, "loss": 1.7445, "step": 112500 }, { "epoch": 0.8, "learning_rate": 4.92181167959893e-06, "loss": 1.6694, "step": 113000 }, { "epoch": 0.8, "learning_rate": 4.92112159881294e-06, "loss": 1.715, "step": 113500 }, { "epoch": 0.81, "learning_rate": 4.920428534913476e-06, "loss": 1.6984, "step": 114000 }, { "epoch": 0.81, "learning_rate": 4.919732488754477e-06, "loss": 1.6853, "step": 114500 }, { "epoch": 0.81, "learning_rate": 4.919033461193554e-06, "loss": 1.6866, "step": 115000 }, { "epoch": 0.82, "learning_rate": 4.918331453091995e-06, "loss": 1.6843, "step": 115500 }, { "epoch": 0.82, "learning_rate": 4.917627878263452e-06, "loss": 1.6953, "step": 116000 }, { "epoch": 0.82, "learning_rate": 4.916919917635911e-06, "loss": 1.7, "step": 116500 }, { "epoch": 0.83, "learning_rate": 4.916208979071873e-06, "loss": 1.6819, "step": 117000 }, { "epoch": 0.83, "learning_rate": 4.915495063447302e-06, "loss": 1.6938, "step": 117500 }, { "epoch": 0.83, "learning_rate": 4.914779608395079e-06, "loss": 1.6556, "step": 118000 }, { "epoch": 0.84, "learning_rate": 4.914059747241706e-06, "loss": 1.674, "step": 118500 }, { "epoch": 0.84, "learning_rate": 4.913336911675913e-06, "loss": 1.6958, "step": 119000 }, { "epoch": 0.84, "learning_rate": 4.912611102588316e-06, "loss": 1.6646, "step": 119500 }, { "epoch": 0.85, "learning_rate": 4.9118823208732035e-06, "loss": 1.6849, "step": 120000 }, { "epoch": 0.85, "learning_rate": 4.91115350036079e-06, "loss": 1.6699, "step": 120500 }, { "epoch": 0.86, "learning_rate": 4.9104187879696555e-06, "loss": 1.6576, "step": 121000 }, { "epoch": 0.86, "learning_rate": 4.9096811056521994e-06, "loss": 1.663, "step": 121500 }, { "epoch": 0.86, "learning_rate": 4.908940454317336e-06, "loss": 1.6596, "step": 122000 }, { "epoch": 0.87, "learning_rate": 4.9081968348776354e-06, "loss": 1.6924, "step": 122500 }, { "epoch": 0.87, "learning_rate": 4.9074502482493275e-06, "loss": 1.7073, "step": 123000 }, { "epoch": 0.87, "learning_rate": 4.906700695352297e-06, "loss": 1.6862, "step": 123500 }, { "epoch": 0.88, "learning_rate": 4.905948177110082e-06, "loss": 1.6563, "step": 124000 }, { "epoch": 0.88, "learning_rate": 4.905194208373068e-06, "loss": 1.6589, "step": 124500 }, { "epoch": 0.88, "learning_rate": 4.904435768151762e-06, "loss": 1.6651, "step": 125000 }, { "epoch": 0.89, "learning_rate": 4.903674365375935e-06, "loss": 1.6919, "step": 125500 }, { "epoch": 0.89, "learning_rate": 4.902910000983726e-06, "loss": 1.6779, "step": 126000 }, { "epoch": 0.89, "learning_rate": 4.902144213521186e-06, "loss": 1.6898, "step": 126500 }, { "epoch": 0.9, "learning_rate": 4.901373934643746e-06, "loss": 1.7132, "step": 127000 }, { "epoch": 0.9, "learning_rate": 4.900600696984332e-06, "loss": 1.6836, "step": 127500 }, { "epoch": 0.9, "learning_rate": 4.899824501495667e-06, "loss": 1.6751, "step": 128000 }, { "epoch": 0.91, "learning_rate": 4.899046910389163e-06, "loss": 1.6768, "step": 128500 }, { "epoch": 0.91, "learning_rate": 4.8982648080256045e-06, "loss": 1.6758, "step": 129000 }, { "epoch": 0.92, "learning_rate": 4.897479750710892e-06, "loss": 1.6593, "step": 129500 }, { "epoch": 0.92, "learning_rate": 4.89669173941231e-06, "loss": 1.6927, "step": 130000 }, { "epoch": 0.92, "learning_rate": 4.895902359975868e-06, "loss": 1.6926, "step": 130500 }, { "epoch": 0.93, "learning_rate": 4.895108449529063e-06, "loss": 1.6771, "step": 131000 }, { "epoch": 0.93, "learning_rate": 4.894311588020119e-06, "loss": 1.6277, "step": 131500 }, { "epoch": 0.93, "learning_rate": 4.893511776430863e-06, "loss": 1.6672, "step": 132000 }, { "epoch": 0.94, "learning_rate": 4.892709015746761e-06, "loss": 1.6937, "step": 132500 }, { "epoch": 0.94, "learning_rate": 4.89190492131604e-06, "loss": 1.676, "step": 133000 }, { "epoch": 0.94, "learning_rate": 4.891096271306403e-06, "loss": 1.6808, "step": 133500 }, { "epoch": 0.95, "learning_rate": 4.890284675178114e-06, "loss": 1.6601, "step": 134000 }, { "epoch": 0.95, "learning_rate": 4.8894701339311555e-06, "loss": 1.6762, "step": 134500 }, { "epoch": 0.95, "learning_rate": 4.888654286477422e-06, "loss": 1.6826, "step": 135000 }, { "epoch": 0.96, "learning_rate": 4.8878338638927996e-06, "loss": 1.649, "step": 135500 }, { "epoch": 0.96, "learning_rate": 4.887010499209203e-06, "loss": 1.677, "step": 136000 }, { "epoch": 3.62, "learning_rate": 4.983889443816445e-06, "loss": 1.5933, "step": 136500 }, { "epoch": 3.63, "learning_rate": 4.983771280259433e-06, "loss": 1.5906, "step": 137000 }, { "epoch": 3.64, "learning_rate": 4.983652686357577e-06, "loss": 1.5919, "step": 137500 }, { "epoch": 3.66, "learning_rate": 4.983533662131423e-06, "loss": 1.5722, "step": 138000 }, { "epoch": 3.67, "learning_rate": 4.983414207601596e-06, "loss": 1.5707, "step": 138500 }, { "epoch": 3.68, "learning_rate": 4.98329432278879e-06, "loss": 1.5884, "step": 139000 }, { "epoch": 3.7, "learning_rate": 4.983174007713778e-06, "loss": 1.5945, "step": 139500 }, { "epoch": 3.71, "learning_rate": 4.983053262397406e-06, "loss": 1.5783, "step": 140000 }, { "epoch": 3.72, "learning_rate": 4.982932329641013e-06, "loss": 1.5702, "step": 140500 }, { "epoch": 3.74, "learning_rate": 4.9828109684042125e-06, "loss": 1.5809, "step": 141000 }, { "epoch": 3.75, "learning_rate": 4.9826889342102535e-06, "loss": 1.5836, "step": 141500 }, { "epoch": 3.76, "learning_rate": 4.982566715216966e-06, "loss": 1.5774, "step": 142000 }, { "epoch": 3.78, "learning_rate": 4.982443821589845e-06, "loss": 1.568, "step": 142500 }, { "epoch": 3.79, "learning_rate": 4.98232049784788e-06, "loss": 1.5779, "step": 143000 }, { "epoch": 3.8, "learning_rate": 4.9821967440124365e-06, "loss": 1.5926, "step": 143500 }, { "epoch": 3.82, "learning_rate": 4.9820725601049555e-06, "loss": 1.5753, "step": 144000 }, { "epoch": 3.83, "learning_rate": 4.981947946146954e-06, "loss": 1.575, "step": 144500 }, { "epoch": 3.84, "learning_rate": 4.981822902160024e-06, "loss": 1.5769, "step": 145000 }, { "epoch": 3.86, "learning_rate": 4.981697428165829e-06, "loss": 1.5729, "step": 145500 }, { "epoch": 3.87, "learning_rate": 4.98157152418611e-06, "loss": 1.5922, "step": 146000 }, { "epoch": 3.88, "learning_rate": 4.981445190242682e-06, "loss": 1.5819, "step": 146500 }, { "epoch": 3.9, "learning_rate": 4.981318426357433e-06, "loss": 1.5708, "step": 147000 }, { "epoch": 3.91, "learning_rate": 4.981191232552328e-06, "loss": 1.5657, "step": 147500 }, { "epoch": 3.92, "learning_rate": 4.981063608849403e-06, "loss": 1.5651, "step": 148000 }, { "epoch": 3.94, "learning_rate": 4.98093581180693e-06, "loss": 1.5693, "step": 148500 }, { "epoch": 3.95, "learning_rate": 4.9808073292344626e-06, "loss": 1.5734, "step": 149000 }, { "epoch": 3.96, "learning_rate": 4.980678416830693e-06, "loss": 1.5789, "step": 149500 }, { "epoch": 3.97, "learning_rate": 4.980549074617956e-06, "loss": 1.5766, "step": 150000 }, { "epoch": 3.99, "learning_rate": 4.980419562591572e-06, "loss": 1.5693, "step": 150500 }, { "epoch": 4.0, "learning_rate": 4.980289361687711e-06, "loss": 1.5845, "step": 151000 }, { "epoch": 4.01, "learning_rate": 4.980158731042292e-06, "loss": 1.5355, "step": 151500 }, { "epoch": 4.03, "learning_rate": 4.980027670677947e-06, "loss": 1.5572, "step": 152000 }, { "epoch": 4.04, "learning_rate": 4.979896180617384e-06, "loss": 1.5396, "step": 152500 }, { "epoch": 4.05, "learning_rate": 4.979764525151654e-06, "loss": 1.5605, "step": 153000 }, { "epoch": 4.07, "learning_rate": 4.979632176626357e-06, "loss": 1.5431, "step": 153500 }, { "epoch": 4.08, "learning_rate": 4.979499398473366e-06, "loss": 1.5378, "step": 154000 }, { "epoch": 4.09, "learning_rate": 4.979366190715688e-06, "loss": 1.536, "step": 154500 }, { "epoch": 4.11, "learning_rate": 4.979232553376403e-06, "loss": 1.5316, "step": 155000 }, { "epoch": 4.12, "learning_rate": 4.979098486478665e-06, "loss": 1.5367, "step": 155500 }, { "epoch": 4.13, "learning_rate": 4.978963990045702e-06, "loss": 1.5489, "step": 156000 }, { "epoch": 4.15, "learning_rate": 4.978829334381346e-06, "loss": 1.5518, "step": 156500 }, { "epoch": 4.16, "learning_rate": 4.9786939798068715e-06, "loss": 1.5375, "step": 157000 }, { "epoch": 4.17, "learning_rate": 4.978558195767259e-06, "loss": 1.5403, "step": 157500 }, { "epoch": 4.19, "learning_rate": 4.9784219822860345e-06, "loss": 1.5407, "step": 158000 }, { "epoch": 4.2, "learning_rate": 4.9782856131011405e-06, "loss": 1.5539, "step": 158500 }, { "epoch": 4.21, "learning_rate": 4.9781485416663325e-06, "loss": 1.5269, "step": 159000 }, { "epoch": 4.23, "learning_rate": 4.97801104086089e-06, "loss": 1.5315, "step": 159500 }, { "epoch": 4.24, "learning_rate": 4.977873110708638e-06, "loss": 1.542, "step": 160000 }, { "epoch": 4.25, "learning_rate": 4.977735028380872e-06, "loss": 1.5212, "step": 160500 }, { "epoch": 4.27, "learning_rate": 4.977596240465341e-06, "loss": 1.5384, "step": 161000 }, { "epoch": 4.28, "learning_rate": 4.977457023274869e-06, "loss": 1.5483, "step": 161500 }, { "epoch": 4.29, "learning_rate": 4.977317376833576e-06, "loss": 1.5383, "step": 162000 }, { "epoch": 4.31, "learning_rate": 4.9771773011656586e-06, "loss": 1.537, "step": 162500 }, { "epoch": 4.32, "learning_rate": 4.977036796295387e-06, "loss": 1.5508, "step": 163000 }, { "epoch": 4.33, "learning_rate": 4.976896144543505e-06, "loss": 1.5501, "step": 163500 }, { "epoch": 4.35, "learning_rate": 4.976754782199913e-06, "loss": 1.5394, "step": 164000 }, { "epoch": 4.36, "learning_rate": 4.976612990727174e-06, "loss": 1.5345, "step": 164500 }, { "epoch": 4.37, "learning_rate": 4.976470770149855e-06, "loss": 1.5118, "step": 165000 }, { "epoch": 4.39, "learning_rate": 4.976328120492595e-06, "loss": 1.5261, "step": 165500 }, { "epoch": 4.4, "learning_rate": 4.976185328365718e-06, "loss": 1.5453, "step": 166000 }, { "epoch": 4.41, "learning_rate": 4.976041821480838e-06, "loss": 1.5373, "step": 166500 }, { "epoch": 4.43, "learning_rate": 4.97589788559034e-06, "loss": 1.559, "step": 167000 }, { "epoch": 4.44, "learning_rate": 4.975753520719162e-06, "loss": 1.5539, "step": 167500 }, { "epoch": 4.45, "learning_rate": 4.975608726892317e-06, "loss": 1.5642, "step": 168000 }, { "epoch": 4.47, "learning_rate": 4.975463504134893e-06, "loss": 1.5403, "step": 168500 }, { "epoch": 4.48, "learning_rate": 4.975317852472051e-06, "loss": 1.5434, "step": 169000 }, { "epoch": 4.49, "learning_rate": 4.975171771929028e-06, "loss": 1.5272, "step": 169500 }, { "epoch": 4.5, "learning_rate": 4.97502555597791e-06, "loss": 1.5326, "step": 170000 }, { "epoch": 4.52, "learning_rate": 4.974878912910856e-06, "loss": 1.5426, "step": 170500 }, { "epoch": 4.53, "learning_rate": 4.974731547594612e-06, "loss": 1.5456, "step": 171000 }, { "epoch": 4.54, "learning_rate": 4.974583753499772e-06, "loss": 1.5501, "step": 171500 }, { "epoch": 4.56, "learning_rate": 4.974435530651941e-06, "loss": 1.5401, "step": 172000 }, { "epoch": 4.57, "learning_rate": 4.9742868790768025e-06, "loss": 1.5446, "step": 172500 }, { "epoch": 4.58, "learning_rate": 4.974138097388491e-06, "loss": 1.5262, "step": 173000 }, { "epoch": 4.6, "learning_rate": 4.973988888737395e-06, "loss": 1.5426, "step": 173500 }, { "epoch": 4.61, "learning_rate": 4.973838952849711e-06, "loss": 1.5344, "step": 174000 }, { "epoch": 4.62, "learning_rate": 4.973688588338083e-06, "loss": 1.543, "step": 174500 }, { "epoch": 4.64, "learning_rate": 4.973537795228565e-06, "loss": 1.5278, "step": 175000 }, { "epoch": 4.65, "learning_rate": 4.9733865735472815e-06, "loss": 1.5355, "step": 175500 }, { "epoch": 4.66, "learning_rate": 4.973234923320436e-06, "loss": 1.5363, "step": 176000 }, { "epoch": 4.68, "learning_rate": 4.973082844574302e-06, "loss": 1.5507, "step": 176500 }, { "epoch": 4.69, "learning_rate": 4.97293033733523e-06, "loss": 1.5343, "step": 177000 }, { "epoch": 4.7, "learning_rate": 4.972777401629643e-06, "loss": 1.5456, "step": 177500 }, { "epoch": 4.72, "learning_rate": 4.972624037484041e-06, "loss": 1.5407, "step": 178000 }, { "epoch": 4.73, "learning_rate": 4.97247055293765e-06, "loss": 1.5362, "step": 178500 }, { "epoch": 4.74, "learning_rate": 4.9723163328485536e-06, "loss": 1.5474, "step": 179000 }, { "epoch": 4.76, "learning_rate": 4.972161684399326e-06, "loss": 1.5364, "step": 179500 }, { "epoch": 4.77, "learning_rate": 4.972006607616762e-06, "loss": 1.5343, "step": 180000 }, { "epoch": 4.78, "learning_rate": 4.971851413965343e-06, "loss": 1.5308, "step": 180500 }, { "epoch": 4.8, "learning_rate": 4.971695481453321e-06, "loss": 1.5394, "step": 181000 }, { "epoch": 4.81, "learning_rate": 4.9715391206887405e-06, "loss": 1.5341, "step": 181500 }, { "epoch": 4.82, "learning_rate": 4.971382331698691e-06, "loss": 1.5283, "step": 182000 }, { "epoch": 4.84, "learning_rate": 4.971225114510338e-06, "loss": 1.5497, "step": 182500 }, { "epoch": 4.85, "learning_rate": 4.971067469150923e-06, "loss": 1.5276, "step": 183000 }, { "epoch": 4.86, "learning_rate": 4.970909395647759e-06, "loss": 1.5338, "step": 183500 }, { "epoch": 4.88, "learning_rate": 4.970751211458715e-06, "loss": 1.5269, "step": 184000 }, { "epoch": 4.89, "learning_rate": 4.970592282606443e-06, "loss": 1.5278, "step": 184500 }, { "epoch": 4.9, "learning_rate": 4.9704329256927524e-06, "loss": 1.5371, "step": 185000 }, { "epoch": 4.92, "learning_rate": 4.970273140745256e-06, "loss": 1.5146, "step": 185500 }, { "epoch": 4.93, "learning_rate": 4.97011292779164e-06, "loss": 1.5309, "step": 186000 }, { "epoch": 4.94, "learning_rate": 4.9699526085686275e-06, "loss": 1.5257, "step": 186500 }, { "epoch": 4.96, "learning_rate": 4.9697915405419915e-06, "loss": 1.5359, "step": 187000 }, { "epoch": 4.97, "learning_rate": 4.969630044592678e-06, "loss": 1.5429, "step": 187500 }, { "epoch": 4.98, "learning_rate": 4.969468120748667e-06, "loss": 1.5167, "step": 188000 }, { "epoch": 5.0, "learning_rate": 4.969305769038015e-06, "loss": 1.5503, "step": 188500 }, { "epoch": 5.01, "learning_rate": 4.969143315474913e-06, "loss": 1.4932, "step": 189000 }, { "epoch": 5.02, "learning_rate": 4.968980435810976e-06, "loss": 1.4474, "step": 189500 }, { "epoch": 5.03, "learning_rate": 4.968816802380544e-06, "loss": 1.4653, "step": 190000 }, { "epoch": 5.05, "learning_rate": 4.96865274119632e-06, "loss": 1.4524, "step": 190500 }, { "epoch": 5.06, "learning_rate": 4.968488252286729e-06, "loss": 1.4792, "step": 191000 }, { "epoch": 5.07, "learning_rate": 4.968323335680272e-06, "loss": 1.4806, "step": 191500 }, { "epoch": 5.09, "learning_rate": 4.968157991405521e-06, "loss": 1.4716, "step": 192000 }, { "epoch": 5.1, "learning_rate": 4.967992219491125e-06, "loss": 1.4671, "step": 192500 }, { "epoch": 5.11, "learning_rate": 4.967826019965807e-06, "loss": 1.4659, "step": 193000 }, { "epoch": 5.13, "learning_rate": 4.967659392858362e-06, "loss": 1.4676, "step": 193500 }, { "epoch": 5.14, "learning_rate": 4.96749233819766e-06, "loss": 1.4606, "step": 194000 }, { "epoch": 5.15, "learning_rate": 4.967324856012646e-06, "loss": 1.4769, "step": 194500 }, { "epoch": 5.17, "learning_rate": 4.967156946332338e-06, "loss": 1.4657, "step": 195000 }, { "epoch": 5.18, "learning_rate": 4.9669889462867134e-06, "loss": 1.4741, "step": 195500 }, { "epoch": 5.19, "learning_rate": 4.966820182558015e-06, "loss": 1.4726, "step": 196000 }, { "epoch": 5.21, "learning_rate": 4.966650991421464e-06, "loss": 1.452, "step": 196500 }, { "epoch": 5.22, "learning_rate": 4.966481372906374e-06, "loss": 1.4527, "step": 197000 }, { "epoch": 5.23, "learning_rate": 4.966311667560339e-06, "loss": 1.4611, "step": 197500 }, { "epoch": 5.25, "learning_rate": 4.966141195231022e-06, "loss": 1.4572, "step": 198000 }, { "epoch": 5.26, "learning_rate": 4.965970637837151e-06, "loss": 1.4513, "step": 198500 }, { "epoch": 5.27, "learning_rate": 4.965799311811517e-06, "loss": 1.4561, "step": 199000 }, { "epoch": 5.29, "learning_rate": 4.9656275585549105e-06, "loss": 1.4485, "step": 199500 }, { "epoch": 5.3, "learning_rate": 4.9654553780970885e-06, "loss": 1.4565, "step": 200000 }, { "epoch": 5.31, "learning_rate": 4.965282770467883e-06, "loss": 1.4488, "step": 200500 }, { "epoch": 5.33, "learning_rate": 4.965109735697201e-06, "loss": 1.4566, "step": 201000 }, { "epoch": 5.34, "learning_rate": 4.964936273815022e-06, "loss": 1.4779, "step": 201500 }, { "epoch": 5.35, "learning_rate": 4.964762384851403e-06, "loss": 1.4778, "step": 202000 }, { "epoch": 5.37, "learning_rate": 4.96458806883647e-06, "loss": 1.4882, "step": 202500 }, { "epoch": 5.38, "learning_rate": 4.964414025623156e-06, "loss": 1.4705, "step": 203000 }, { "epoch": 5.39, "learning_rate": 4.964238857304181e-06, "loss": 1.4777, "step": 203500 }, { "epoch": 5.41, "learning_rate": 4.964063262024601e-06, "loss": 1.4534, "step": 204000 }, { "epoch": 5.42, "learning_rate": 4.963887239814839e-06, "loss": 1.4618, "step": 204500 }, { "epoch": 5.43, "learning_rate": 4.963710790705395e-06, "loss": 1.4535, "step": 205000 }, { "epoch": 5.45, "learning_rate": 4.963533914726838e-06, "loss": 1.4756, "step": 205500 }, { "epoch": 5.46, "learning_rate": 4.963356966941416e-06, "loss": 1.4723, "step": 206000 }, { "epoch": 5.47, "learning_rate": 4.963179238170235e-06, "loss": 1.4755, "step": 206500 }, { "epoch": 5.49, "learning_rate": 4.96300108262204e-06, "loss": 1.45, "step": 207000 }, { "epoch": 5.5, "learning_rate": 4.9628225003277e-06, "loss": 1.4747, "step": 207500 }, { "epoch": 5.51, "learning_rate": 4.9626434913181575e-06, "loss": 1.4682, "step": 208000 }, { "epoch": 5.53, "learning_rate": 4.962464055624426e-06, "loss": 1.4798, "step": 208500 }, { "epoch": 5.54, "learning_rate": 4.962284193277597e-06, "loss": 1.4686, "step": 209000 }, { "epoch": 5.55, "learning_rate": 4.962103904308832e-06, "loss": 1.463, "step": 209500 }, { "epoch": 5.56, "learning_rate": 4.961923188749369e-06, "loss": 1.4668, "step": 210000 }, { "epoch": 5.58, "learning_rate": 4.961742046630521e-06, "loss": 1.478, "step": 210500 }, { "epoch": 5.59, "learning_rate": 4.96156047798367e-06, "loss": 1.4583, "step": 211000 }, { "epoch": 5.6, "learning_rate": 4.961378482840278e-06, "loss": 1.4534, "step": 211500 }, { "epoch": 5.62, "learning_rate": 4.961196791767787e-06, "loss": 1.4678, "step": 212000 }, { "epoch": 5.63, "learning_rate": 4.961013945431653e-06, "loss": 1.4661, "step": 212500 }, { "epoch": 5.64, "learning_rate": 4.960830672693671e-06, "loss": 1.4691, "step": 213000 }, { "epoch": 5.66, "learning_rate": 4.960646973585595e-06, "loss": 1.4797, "step": 213500 }, { "epoch": 5.67, "learning_rate": 4.960462848139253e-06, "loss": 1.4629, "step": 214000 }, { "epoch": 5.68, "learning_rate": 4.960278296386547e-06, "loss": 1.4625, "step": 214500 }, { "epoch": 5.7, "learning_rate": 4.960093318359453e-06, "loss": 1.4739, "step": 215000 }, { "epoch": 5.71, "learning_rate": 4.959907914090021e-06, "loss": 1.4674, "step": 215500 }, { "epoch": 5.72, "learning_rate": 4.959722455696669e-06, "loss": 1.4582, "step": 216000 }, { "epoch": 5.74, "learning_rate": 4.959536572828244e-06, "loss": 1.4617, "step": 216500 }, { "epoch": 5.75, "learning_rate": 4.959349891729353e-06, "loss": 1.4862, "step": 217000 }, { "epoch": 5.76, "learning_rate": 4.959162784516932e-06, "loss": 1.4584, "step": 217500 }, { "epoch": 5.78, "learning_rate": 4.958975251223399e-06, "loss": 1.4493, "step": 218000 }, { "epoch": 5.79, "learning_rate": 4.958787291881248e-06, "loss": 1.4721, "step": 218500 }, { "epoch": 5.8, "learning_rate": 4.958598906523043e-06, "loss": 1.4536, "step": 219000 }, { "epoch": 5.82, "learning_rate": 4.9584100951814265e-06, "loss": 1.4776, "step": 219500 }, { "epoch": 5.83, "learning_rate": 4.958221236788773e-06, "loss": 1.4852, "step": 220000 }, { "epoch": 23.37, "learning_rate": 4.356277313166177e-06, "loss": 1.4167, "step": 220500 }, { "epoch": 23.43, "learning_rate": 4.353486494789948e-06, "loss": 1.4065, "step": 221000 }, { "epoch": 23.48, "learning_rate": 4.350690537902183e-06, "loss": 1.4014, "step": 221500 }, { "epoch": 23.53, "learning_rate": 4.3478894502542505e-06, "loss": 1.4092, "step": 222000 }, { "epoch": 23.58, "learning_rate": 4.3450832396117424e-06, "loss": 1.3999, "step": 222500 }, { "epoch": 23.64, "learning_rate": 4.342271913754456e-06, "loss": 1.4067, "step": 223000 }, { "epoch": 23.69, "learning_rate": 4.339455480476367e-06, "loss": 1.4049, "step": 223500 }, { "epoch": 23.74, "learning_rate": 4.336639595735609e-06, "loss": 1.404, "step": 224000 }, { "epoch": 15.86, "learning_rate": 4.69610858653822e-06, "loss": 1.3812, "step": 224500 }, { "epoch": 15.9, "learning_rate": 4.69478118039917e-06, "loss": 1.3888, "step": 225000 }, { "epoch": 15.94, "learning_rate": 4.693451069948868e-06, "loss": 1.3894, "step": 225500 }, { "epoch": 15.97, "learning_rate": 4.692118256826217e-06, "loss": 1.3942, "step": 226000 }, { "epoch": 16.01, "learning_rate": 4.69078274267345e-06, "loss": 1.3959, "step": 226500 }, { "epoch": 16.04, "learning_rate": 4.689444529136127e-06, "loss": 1.3875, "step": 227000 }, { "epoch": 16.08, "learning_rate": 4.688103617863134e-06, "loss": 1.3807, "step": 227500 }, { "epoch": 16.11, "learning_rate": 4.686760010506685e-06, "loss": 1.3778, "step": 228000 }, { "epoch": 16.15, "learning_rate": 4.685416404013816e-06, "loss": 1.389, "step": 228500 }, { "epoch": 16.18, "learning_rate": 4.68406741484425e-06, "loss": 1.3981, "step": 229000 }, { "epoch": 16.22, "learning_rate": 4.682715734564455e-06, "loss": 1.3847, "step": 229500 }, { "epoch": 16.25, "learning_rate": 4.681361364839912e-06, "loss": 1.3804, "step": 230000 }, { "epoch": 16.29, "learning_rate": 4.680009740921249e-06, "loss": 1.392, "step": 230500 }, { "epoch": 16.32, "learning_rate": 4.678650008057981e-06, "loss": 1.3871, "step": 231000 }, { "epoch": 16.36, "learning_rate": 4.67728759075957e-06, "loss": 1.3851, "step": 231500 }, { "epoch": 16.39, "learning_rate": 4.675922490704728e-06, "loss": 1.3877, "step": 232000 }, { "epoch": 8.21, "learning_rate": 4.9172705288745405e-06, "loss": 1.369, "step": 232500 }, { "epoch": 8.23, "learning_rate": 4.916916872855345e-06, "loss": 1.3743, "step": 233000 }, { "epoch": 8.25, "learning_rate": 4.9165617643469424e-06, "loss": 1.3815, "step": 233500 }, { "epoch": 8.27, "learning_rate": 4.916205911443638e-06, "loss": 1.4003, "step": 234000 }, { "epoch": 19.89, "learning_rate": 4.528145578498841e-06, "loss": 1.3638, "step": 234500 }, { "epoch": 20.76, "learning_rate": 4.487385084270672e-06, "loss": 1.3662, "step": 235000 }, { "epoch": 4.99, "learning_rate": 4.969336651851814e-06, "loss": 1.3665, "step": 235500 }, { "epoch": 5.0, "learning_rate": 4.969206524022907e-06, "loss": 1.365, "step": 236000 }, { "epoch": 5.01, "learning_rate": 4.969076122373843e-06, "loss": 1.3487, "step": 236500 }, { "epoch": 5.02, "learning_rate": 4.968945708543242e-06, "loss": 1.3585, "step": 237000 }, { "epoch": 5.03, "learning_rate": 4.968814759844848e-06, "loss": 1.3557, "step": 237500 }, { "epoch": 5.05, "learning_rate": 4.968683537369742e-06, "loss": 1.346, "step": 238000 }, { "epoch": 5.06, "learning_rate": 4.968552041132476e-06, "loss": 1.368, "step": 238500 }, { "epoch": 5.07, "learning_rate": 4.968420534960793e-06, "loss": 1.3677, "step": 239000 }, { "epoch": 5.08, "learning_rate": 4.968288491790435e-06, "loss": 1.3728, "step": 239500 }, { "epoch": 5.09, "learning_rate": 4.968156174901726e-06, "loss": 1.3692, "step": 240000 }, { "epoch": 5.1, "learning_rate": 4.968023584309339e-06, "loss": 1.3595, "step": 240500 }, { "epoch": 5.11, "learning_rate": 4.967890720027977e-06, "loss": 1.3639, "step": 241000 }, { "epoch": 5.12, "learning_rate": 4.967757582072374e-06, "loss": 1.38, "step": 241500 }, { "epoch": 5.13, "learning_rate": 4.967624170457293e-06, "loss": 1.3552, "step": 242000 }, { "epoch": 5.14, "learning_rate": 4.967490752841137e-06, "loss": 1.35, "step": 242500 }, { "epoch": 5.15, "learning_rate": 4.967356794498762e-06, "loss": 1.3782, "step": 243000 }, { "epoch": 5.16, "learning_rate": 4.967222562541354e-06, "loss": 1.3719, "step": 243500 }, { "epoch": 5.17, "learning_rate": 4.9670880569838e-06, "loss": 1.3601, "step": 244000 }, { "epoch": 5.18, "learning_rate": 4.9669532778410155e-06, "loss": 1.3735, "step": 244500 }, { "epoch": 5.19, "learning_rate": 4.966818225127948e-06, "loss": 1.3798, "step": 245000 }, { "epoch": 5.2, "learning_rate": 4.9666828988595705e-06, "loss": 1.351, "step": 245500 }, { "epoch": 5.22, "learning_rate": 4.966547299050893e-06, "loss": 1.3579, "step": 246000 }, { "epoch": 2.61, "learning_rate": 4.991588952906284e-06, "loss": 1.3578, "step": 246500 }, { "epoch": 2.62, "learning_rate": 4.991554801918827e-06, "loss": 1.362, "step": 247000 }, { "epoch": 2.62, "learning_rate": 4.991520581858223e-06, "loss": 1.3771, "step": 247500 }, { "epoch": 2.63, "learning_rate": 4.9914862927254196e-06, "loss": 1.3466, "step": 248000 }, { "epoch": 2.63, "learning_rate": 4.991452003306709e-06, "loss": 1.3999, "step": 248500 }, { "epoch": 2.64, "learning_rate": 4.991417576170502e-06, "loss": 1.3488, "step": 249000 }, { "epoch": 2.64, "learning_rate": 4.991383079964951e-06, "loss": 1.3996, "step": 249500 }, { "epoch": 2.65, "learning_rate": 4.991348514691014e-06, "loss": 1.3566, "step": 250000 }, { "epoch": 2.66, "learning_rate": 4.99131394968726e-06, "loss": 1.3895, "step": 250500 }, { "epoch": 2.66, "learning_rate": 4.991279246417557e-06, "loss": 1.3697, "step": 251000 }, { "epoch": 2.67, "learning_rate": 4.991244474082346e-06, "loss": 1.3646, "step": 251500 }, { "epoch": 2.67, "learning_rate": 4.991209632682591e-06, "loss": 1.3696, "step": 252000 }, { "epoch": 2.68, "learning_rate": 4.9911747921091095e-06, "loss": 1.3863, "step": 252500 }, { "epoch": 2.68, "learning_rate": 4.99113981272129e-06, "loss": 1.3785, "step": 253000 }, { "epoch": 2.69, "learning_rate": 4.991104764271827e-06, "loss": 1.353, "step": 253500 }, { "epoch": 2.69, "learning_rate": 4.9910696467616924e-06, "loss": 1.3879, "step": 254000 }, { "epoch": 2.7, "learning_rate": 4.991034530633923e-06, "loss": 1.3407, "step": 254500 }, { "epoch": 2.7, "learning_rate": 4.990999275143486e-06, "loss": 1.3679, "step": 255000 }, { "epoch": 2.71, "learning_rate": 4.990963950595301e-06, "loss": 1.3667, "step": 255500 }, { "epoch": 2.71, "learning_rate": 4.990928556990351e-06, "loss": 1.3493, "step": 256000 }, { "epoch": 2.72, "learning_rate": 4.990893165323852e-06, "loss": 1.3701, "step": 256500 }, { "epoch": 2.72, "learning_rate": 4.99085763374642e-06, "loss": 1.3872, "step": 257000 }, { "epoch": 2.73, "learning_rate": 4.990822104385346e-06, "loss": 1.4075, "step": 257500 }, { "epoch": 2.73, "learning_rate": 4.990786434839367e-06, "loss": 1.3791, "step": 258000 }, { "epoch": 2.74, "learning_rate": 4.9907506962415395e-06, "loss": 1.3829, "step": 258500 }, { "epoch": 2.75, "learning_rate": 4.990714888592859e-06, "loss": 1.3642, "step": 259000 }, { "epoch": 2.75, "learning_rate": 4.990679011894315e-06, "loss": 1.4128, "step": 259500 }, { "epoch": 2.76, "learning_rate": 4.990643066146902e-06, "loss": 1.3936, "step": 260000 }, { "epoch": 2.76, "learning_rate": 4.990607123450118e-06, "loss": 1.3845, "step": 260500 }, { "epoch": 2.77, "learning_rate": 4.9905710397460545e-06, "loss": 1.3552, "step": 261000 }, { "epoch": 2.77, "learning_rate": 4.9905348869961154e-06, "loss": 1.3767, "step": 261500 }, { "epoch": 2.78, "learning_rate": 4.990498665201305e-06, "loss": 1.3768, "step": 262000 }, { "epoch": 2.78, "learning_rate": 4.990462374362625e-06, "loss": 1.4119, "step": 262500 }, { "epoch": 2.79, "learning_rate": 4.990426014481083e-06, "loss": 1.3987, "step": 263000 }, { "epoch": 2.79, "learning_rate": 4.990389585557687e-06, "loss": 1.3581, "step": 263500 }, { "epoch": 2.8, "learning_rate": 4.990353087593447e-06, "loss": 1.3736, "step": 264000 }, { "epoch": 2.8, "learning_rate": 4.990316593792282e-06, "loss": 1.3948, "step": 264500 }, { "epoch": 2.81, "learning_rate": 4.990279957887468e-06, "loss": 1.3785, "step": 265000 }, { "epoch": 2.81, "learning_rate": 4.990243252944848e-06, "loss": 1.3607, "step": 265500 }, { "epoch": 2.82, "learning_rate": 4.990206478965441e-06, "loss": 1.3876, "step": 266000 }, { "epoch": 2.82, "learning_rate": 4.990169709705192e-06, "loss": 1.3849, "step": 266500 }, { "epoch": 2.83, "learning_rate": 4.990132797793338e-06, "loss": 1.3816, "step": 267000 }, { "epoch": 2.84, "learning_rate": 4.990095816847759e-06, "loss": 1.3752, "step": 267500 }, { "epoch": 2.84, "learning_rate": 4.99005876686948e-06, "loss": 1.3786, "step": 268000 }, { "epoch": 2.85, "learning_rate": 4.99002172216644e-06, "loss": 1.3846, "step": 268500 }, { "epoch": 2.85, "learning_rate": 4.989984534263904e-06, "loss": 1.4057, "step": 269000 }, { "epoch": 2.86, "learning_rate": 4.989947277331753e-06, "loss": 1.4113, "step": 269500 }, { "epoch": 2.86, "learning_rate": 4.989909951371019e-06, "loss": 1.3907, "step": 270000 }, { "epoch": 2.87, "learning_rate": 4.989872631241603e-06, "loss": 1.378, "step": 270500 }, { "epoch": 2.87, "learning_rate": 4.98983531735787e-06, "loss": 1.3763, "step": 271000 }, { "epoch": 2.88, "learning_rate": 4.989797784731755e-06, "loss": 1.3959, "step": 271500 }, { "epoch": 2.88, "learning_rate": 4.989760183081203e-06, "loss": 1.3778, "step": 272000 }, { "epoch": 2.89, "learning_rate": 4.989722512407255e-06, "loss": 1.3821, "step": 272500 }, { "epoch": 2.89, "learning_rate": 4.989684772710956e-06, "loss": 1.3778, "step": 273000 }, { "epoch": 2.9, "learning_rate": 4.9896469639933535e-06, "loss": 1.3934, "step": 273500 }, { "epoch": 2.9, "learning_rate": 4.989609086255493e-06, "loss": 1.3868, "step": 274000 }, { "epoch": 2.91, "learning_rate": 4.989571139498426e-06, "loss": 1.3706, "step": 274500 }, { "epoch": 2.91, "learning_rate": 4.989533123723205e-06, "loss": 1.3697, "step": 275000 }, { "epoch": 2.92, "learning_rate": 4.989495038930883e-06, "loss": 1.3751, "step": 275500 }, { "epoch": 2.93, "learning_rate": 4.9894569614990094e-06, "loss": 1.3906, "step": 276000 }, { "epoch": 2.93, "learning_rate": 4.9894187388136844e-06, "loss": 1.3799, "step": 276500 }, { "epoch": 2.94, "learning_rate": 4.989380447114429e-06, "loss": 1.3671, "step": 277000 }, { "epoch": 8.82, "learning_rate": 4.90462324019519e-06, "loss": 1.3322, "step": 277500 }, { "epoch": 8.84, "learning_rate": 4.904281303331608e-06, "loss": 1.3392, "step": 278000 }, { "epoch": 11.81, "learning_rate": 4.830076428614785e-06, "loss": 1.3219, "step": 278500 }, { "epoch": 11.83, "learning_rate": 4.829472536182216e-06, "loss": 1.3346, "step": 279000 }, { "epoch": 8.89, "learning_rate": 4.9032518937846354e-06, "loss": 1.3237, "step": 279500 }, { "epoch": 8.9, "learning_rate": 4.902907557902285e-06, "loss": 1.335, "step": 280000 }, { "epoch": 8.92, "learning_rate": 4.902562622479655e-06, "loss": 1.3322, "step": 280500 }, { "epoch": 8.94, "learning_rate": 4.902217087602807e-06, "loss": 1.3416, "step": 281000 }, { "epoch": 8.95, "learning_rate": 4.901870953357955e-06, "loss": 1.3218, "step": 281500 }, { "epoch": 8.97, "learning_rate": 4.9015242198314605e-06, "loss": 1.3355, "step": 282000 }, { "epoch": 8.98, "learning_rate": 4.901176887109837e-06, "loss": 1.3258, "step": 282500 }, { "epoch": 9.0, "learning_rate": 4.900828955279745e-06, "loss": 1.3365, "step": 283000 }, { "epoch": 9.02, "learning_rate": 4.900481122087466e-06, "loss": 1.3185, "step": 283500 }, { "epoch": 9.03, "learning_rate": 4.900131993498804e-06, "loss": 1.3258, "step": 284000 }, { "epoch": 10.05, "learning_rate": 4.876468017839525e-06, "loss": 1.3223, "step": 284500 }, { "epoch": 10.07, "learning_rate": 4.876036882141607e-06, "loss": 1.316, "step": 285000 }, { "epoch": 10.09, "learning_rate": 4.875605014532027e-06, "loss": 1.3237, "step": 285500 }, { "epoch": 10.11, "learning_rate": 4.875172415143815e-06, "loss": 1.331, "step": 286000 }, { "epoch": 10.12, "learning_rate": 4.874739084110231e-06, "loss": 1.3168, "step": 286500 }, { "epoch": 10.14, "learning_rate": 4.874305890419807e-06, "loss": 1.3253, "step": 287000 }, { "epoch": 10.16, "learning_rate": 4.873871097958773e-06, "loss": 1.338, "step": 287500 }, { "epoch": 10.18, "learning_rate": 4.873435574253223e-06, "loss": 1.3369, "step": 288000 }, { "epoch": 10.19, "learning_rate": 4.872999319437314e-06, "loss": 1.3304, "step": 288500 }, { "epoch": 10.21, "learning_rate": 4.87256233364543e-06, "loss": 1.3266, "step": 289000 }, { "epoch": 10.23, "learning_rate": 4.872124617012178e-06, "loss": 1.3233, "step": 289500 }, { "epoch": 10.25, "learning_rate": 4.87168704729623e-06, "loss": 1.3372, "step": 290000 }, { "epoch": 10.26, "learning_rate": 4.8712478708459795e-06, "loss": 1.3455, "step": 290500 }, { "epoch": 10.28, "learning_rate": 4.870807963959269e-06, "loss": 1.33, "step": 291000 }, { "epoch": 10.3, "learning_rate": 4.870367326771606e-06, "loss": 1.3395, "step": 291500 }, { "epoch": 10.32, "learning_rate": 4.8699259594187235e-06, "loss": 1.3406, "step": 292000 }, { "epoch": 10.33, "learning_rate": 4.869483862036581e-06, "loss": 1.336, "step": 292500 }, { "epoch": 10.35, "learning_rate": 4.86904103476136e-06, "loss": 1.3385, "step": 293000 }, { "epoch": 10.37, "learning_rate": 4.8685974777294685e-06, "loss": 1.333, "step": 293500 }, { "epoch": 10.39, "learning_rate": 4.868153191077541e-06, "loss": 1.3279, "step": 294000 }, { "epoch": 10.41, "learning_rate": 4.867708174942434e-06, "loss": 1.3391, "step": 294500 }, { "epoch": 10.42, "learning_rate": 4.867263321679989e-06, "loss": 1.3125, "step": 295000 }, { "epoch": 10.44, "learning_rate": 4.866818635793604e-06, "loss": 1.321, "step": 295500 }, { "epoch": 10.46, "learning_rate": 4.866371436406369e-06, "loss": 1.3237, "step": 296000 }, { "epoch": 10.48, "learning_rate": 4.865923508084802e-06, "loss": 1.3373, "step": 296500 }, { "epoch": 10.49, "learning_rate": 4.865474850966887e-06, "loss": 1.3608, "step": 297000 }, { "epoch": 10.51, "learning_rate": 4.865025465190824e-06, "loss": 1.3348, "step": 297500 }, { "epoch": 10.53, "learning_rate": 4.864576251850606e-06, "loss": 1.3257, "step": 298000 }, { "epoch": 10.55, "learning_rate": 4.8641254106303825e-06, "loss": 1.3386, "step": 298500 }, { "epoch": 10.56, "learning_rate": 4.863673841167693e-06, "loss": 1.3338, "step": 299000 }, { "epoch": 10.58, "learning_rate": 4.863221543601638e-06, "loss": 1.3363, "step": 299500 }, { "epoch": 10.6, "learning_rate": 4.8627685180715436e-06, "loss": 1.3344, "step": 300000 }, { "epoch": 10.62, "learning_rate": 4.862314764716958e-06, "loss": 1.3245, "step": 300500 }, { "epoch": 10.64, "learning_rate": 4.861860283677654e-06, "loss": 1.3313, "step": 301000 }, { "epoch": 10.65, "learning_rate": 4.8614050750936315e-06, "loss": 1.3281, "step": 301500 }, { "epoch": 10.67, "learning_rate": 4.860949139105111e-06, "loss": 1.3431, "step": 302000 }, { "epoch": 8.55, "learning_rate": 4.910414959702474e-06, "loss": 1.3054, "step": 302500 }, { "epoch": 8.56, "learning_rate": 4.910120236938768e-06, "loss": 1.2943, "step": 303000 }, { "epoch": 8.58, "learning_rate": 4.909825039045666e-06, "loss": 1.2908, "step": 303500 }, { "epoch": 8.59, "learning_rate": 4.909529957901373e-06, "loss": 1.2962, "step": 304000 }, { "epoch": 8.61, "learning_rate": 4.909233810874125e-06, "loss": 1.2905, "step": 304500 }, { "epoch": 8.62, "learning_rate": 4.90893718889223e-06, "loss": 1.3046, "step": 305000 }, { "epoch": 8.64, "learning_rate": 4.908640092014164e-06, "loss": 1.3078, "step": 305500 }, { "epoch": 8.65, "learning_rate": 4.908342520298496e-06, "loss": 1.3022, "step": 306000 }, { "epoch": 8.66, "learning_rate": 4.90804507037067e-06, "loss": 1.3054, "step": 306500 }, { "epoch": 8.68, "learning_rate": 4.9077465501052635e-06, "loss": 1.3044, "step": 307000 }, { "epoch": 8.69, "learning_rate": 4.907447555178408e-06, "loss": 1.3073, "step": 307500 }, { "epoch": 8.71, "learning_rate": 4.9071480856490464e-06, "loss": 1.3195, "step": 308000 }, { "epoch": 8.72, "learning_rate": 4.906848741937917e-06, "loss": 1.3165, "step": 308500 }, { "epoch": 8.73, "learning_rate": 4.906548324329658e-06, "loss": 1.3221, "step": 309000 }, { "epoch": 8.75, "learning_rate": 4.906247432296168e-06, "loss": 1.3176, "step": 309500 }, { "epoch": 8.76, "learning_rate": 4.905946669102941e-06, "loss": 1.2961, "step": 310000 }, { "epoch": 8.78, "learning_rate": 4.905644829345587e-06, "loss": 1.322, "step": 310500 }, { "epoch": 8.79, "learning_rate": 4.905342515341116e-06, "loss": 1.306, "step": 311000 }, { "epoch": 8.8, "learning_rate": 4.905039727149125e-06, "loss": 1.3045, "step": 311500 }, { "epoch": 8.82, "learning_rate": 4.904736464829305e-06, "loss": 1.2781, "step": 312000 }, { "epoch": 8.83, "learning_rate": 4.904432728441443e-06, "loss": 1.2526, "step": 312500 }, { "epoch": 8.85, "learning_rate": 4.904128518045415e-06, "loss": 1.296, "step": 313000 }, { "epoch": 8.86, "learning_rate": 4.903823833701194e-06, "loss": 1.3221, "step": 313500 }, { "epoch": 8.88, "learning_rate": 4.903519286258212e-06, "loss": 1.3018, "step": 314000 }, { "epoch": 8.89, "learning_rate": 4.90321365514549e-06, "loss": 1.2563, "step": 314500 }, { "epoch": 8.9, "learning_rate": 4.9029075502649305e-06, "loss": 1.2638, "step": 315000 }, { "epoch": 8.92, "learning_rate": 4.902600971676879e-06, "loss": 1.261, "step": 315500 }, { "epoch": 8.93, "learning_rate": 4.902294534018904e-06, "loss": 1.2715, "step": 316000 }, { "epoch": 8.95, "learning_rate": 4.901987009144389e-06, "loss": 1.2506, "step": 316500 }, { "epoch": 8.96, "learning_rate": 4.9016790107438574e-06, "loss": 1.266, "step": 317000 }, { "epoch": 8.97, "learning_rate": 4.9013705388780266e-06, "loss": 1.2541, "step": 317500 }, { "epoch": 8.99, "learning_rate": 4.901061593607708e-06, "loss": 1.2595, "step": 318000 }, { "epoch": 9.0, "learning_rate": 4.900752794303393e-06, "loss": 1.2544, "step": 318500 }, { "epoch": 9.02, "learning_rate": 4.900442903353414e-06, "loss": 1.2194, "step": 319000 }, { "epoch": 9.03, "learning_rate": 4.900132539181822e-06, "loss": 1.2294, "step": 319500 }, { "epoch": 9.05, "learning_rate": 4.899821701849801e-06, "loss": 1.2756, "step": 320000 }, { "epoch": 9.06, "learning_rate": 4.899511014511604e-06, "loss": 1.2882, "step": 320500 }, { "epoch": 9.07, "learning_rate": 4.899199231988669e-06, "loss": 1.2716, "step": 321000 }, { "epoch": 9.09, "learning_rate": 4.898886976489296e-06, "loss": 1.2789, "step": 321500 }, { "epoch": 9.1, "learning_rate": 4.8985742480750435e-06, "loss": 1.2785, "step": 322000 }, { "epoch": 9.12, "learning_rate": 4.898261673681964e-06, "loss": 1.282, "step": 322500 }, { "epoch": 9.13, "learning_rate": 4.897948000568522e-06, "loss": 1.3019, "step": 323000 }, { "epoch": 9.14, "learning_rate": 4.897633854725308e-06, "loss": 1.2736, "step": 323500 }, { "epoch": 9.16, "learning_rate": 4.8973198659229585e-06, "loss": 1.2739, "step": 324000 }, { "epoch": 9.17, "learning_rate": 4.897004775751239e-06, "loss": 1.2929, "step": 324500 }, { "epoch": 9.19, "learning_rate": 4.8966892130356955e-06, "loss": 1.2988, "step": 325000 }, { "epoch": 9.2, "learning_rate": 4.8963731778385396e-06, "loss": 1.2867, "step": 325500 }, { "epoch": 9.21, "learning_rate": 4.896056670222072e-06, "loss": 1.2865, "step": 326000 }, { "epoch": 9.23, "learning_rate": 4.89573969024869e-06, "loss": 1.2867, "step": 326500 }, { "epoch": 9.24, "learning_rate": 4.895422237980881e-06, "loss": 1.2765, "step": 327000 }, { "epoch": 9.26, "learning_rate": 4.8951043134812306e-06, "loss": 1.2778, "step": 327500 }, { "epoch": 9.27, "learning_rate": 4.8947859168124125e-06, "loss": 1.2927, "step": 328000 }, { "epoch": 9.29, "learning_rate": 4.8944670480371934e-06, "loss": 1.2865, "step": 328500 }, { "epoch": 9.3, "learning_rate": 4.894147707218439e-06, "loss": 1.2806, "step": 329000 }, { "epoch": 9.31, "learning_rate": 4.893828534515693e-06, "loss": 1.2919, "step": 329500 }, { "epoch": 9.33, "learning_rate": 4.89350825074259e-06, "loss": 1.2914, "step": 330000 }, { "epoch": 9.34, "learning_rate": 4.893188137097092e-06, "loss": 1.2913, "step": 330500 }, { "epoch": 9.36, "learning_rate": 4.8928669106217e-06, "loss": 1.2886, "step": 331000 }, { "epoch": 9.37, "learning_rate": 4.89254521241822e-06, "loss": 1.2815, "step": 331500 }, { "epoch": 9.38, "learning_rate": 4.892223042550072e-06, "loss": 1.285, "step": 332000 }, { "epoch": 9.4, "learning_rate": 4.8919004010807695e-06, "loss": 1.2824, "step": 332500 }, { "epoch": 9.41, "learning_rate": 4.891577288073915e-06, "loss": 1.2805, "step": 333000 }, { "epoch": 9.43, "learning_rate": 4.891253703593208e-06, "loss": 1.2844, "step": 333500 }, { "epoch": 9.44, "learning_rate": 4.890929647702439e-06, "loss": 1.2709, "step": 334000 }, { "epoch": 9.46, "learning_rate": 4.890605120465494e-06, "loss": 1.2866, "step": 334500 }, { "epoch": 9.47, "learning_rate": 4.890280121946349e-06, "loss": 1.2897, "step": 335000 }, { "epoch": 9.48, "learning_rate": 4.889954652209073e-06, "loss": 1.2909, "step": 335500 }, { "epoch": 9.5, "learning_rate": 4.889628711317831e-06, "loss": 1.2988, "step": 336000 }, { "epoch": 9.51, "learning_rate": 4.8893029526309435e-06, "loss": 1.2848, "step": 336500 }, { "epoch": 9.53, "learning_rate": 4.888976724800782e-06, "loss": 1.2852, "step": 337000 }, { "epoch": 9.54, "learning_rate": 4.888649372717259e-06, "loss": 1.2939, "step": 337500 }, { "epoch": 9.55, "learning_rate": 4.888321549737091e-06, "loss": 1.3017, "step": 338000 }, { "epoch": 9.57, "learning_rate": 4.8879932559249035e-06, "loss": 1.2691, "step": 338500 }, { "epoch": 9.58, "learning_rate": 4.887664491345417e-06, "loss": 1.2932, "step": 339000 }, { "epoch": 9.6, "learning_rate": 4.887335256063446e-06, "loss": 1.2779, "step": 339500 }, { "epoch": 9.61, "learning_rate": 4.887006210025384e-06, "loss": 1.2972, "step": 340000 }, { "epoch": 9.62, "learning_rate": 4.886676034474327e-06, "loss": 1.3066, "step": 340500 }, { "epoch": 9.64, "learning_rate": 4.8863453884156476e-06, "loss": 1.2951, "step": 341000 }, { "epoch": 9.65, "learning_rate": 4.8860149346169895e-06, "loss": 1.3115, "step": 341500 }, { "epoch": 9.67, "learning_rate": 4.885683348679396e-06, "loss": 1.2997, "step": 342000 }, { "epoch": 9.68, "learning_rate": 4.885351292429877e-06, "loss": 1.3012, "step": 342500 }, { "epoch": 9.7, "learning_rate": 4.885018765933894e-06, "loss": 1.2889, "step": 343000 }, { "epoch": 9.71, "learning_rate": 4.884685769257e-06, "loss": 1.2942, "step": 343500 }, { "epoch": 9.72, "learning_rate": 4.884352302464844e-06, "loss": 1.2945, "step": 344000 }, { "epoch": 9.74, "learning_rate": 4.8840183656231644e-06, "loss": 1.2867, "step": 344500 }, { "epoch": 9.75, "learning_rate": 4.8836839587977915e-06, "loss": 1.2663, "step": 345000 }, { "epoch": 9.77, "learning_rate": 4.8833497522770725e-06, "loss": 1.2968, "step": 345500 }, { "epoch": 9.78, "learning_rate": 4.883014406621821e-06, "loss": 1.3086, "step": 346000 }, { "epoch": 9.79, "learning_rate": 4.882678591180798e-06, "loss": 1.2935, "step": 346500 }, { "epoch": 9.81, "learning_rate": 4.882342979059261e-06, "loss": 1.2973, "step": 347000 }, { "epoch": 9.82, "learning_rate": 4.882006225184634e-06, "loss": 1.3053, "step": 347500 }, { "epoch": 9.84, "learning_rate": 4.8816690017229865e-06, "loss": 1.3102, "step": 348000 }, { "epoch": 24.63, "learning_rate": 4.288886163167568e-06, "loss": 1.2483, "step": 348500 }, { "epoch": 24.66, "learning_rate": 4.286946522349646e-06, "loss": 1.2306, "step": 349000 }, { "epoch": 24.7, "learning_rate": 4.285004679736106e-06, "loss": 1.2313, "step": 349500 }, { "epoch": 24.73, "learning_rate": 4.283060637719597e-06, "loss": 1.2318, "step": 350000 }, { "epoch": 24.77, "learning_rate": 4.281114398695484e-06, "loss": 1.2443, "step": 350500 }, { "epoch": 24.8, "learning_rate": 4.279169864117727e-06, "loss": 1.2297, "step": 351000 }, { "epoch": 9.31, "learning_rate": 4.89380507022057e-06, "loss": 1.2113, "step": 351500 }, { "epoch": 9.33, "learning_rate": 4.8935047813286015e-06, "loss": 1.221, "step": 352000 }, { "epoch": 9.34, "learning_rate": 4.8932040777096225e-06, "loss": 1.2172, "step": 352500 }, { "epoch": 9.35, "learning_rate": 4.892902959415736e-06, "loss": 1.2136, "step": 353000 }, { "epoch": 9.37, "learning_rate": 4.8926014264991195e-06, "loss": 1.2045, "step": 353500 }, { "epoch": 9.38, "learning_rate": 4.892299479012018e-06, "loss": 1.2177, "step": 354000 }, { "epoch": 9.39, "learning_rate": 4.891997117006753e-06, "loss": 1.2145, "step": 354500 }, { "epoch": 9.41, "learning_rate": 4.891694946502256e-06, "loss": 1.2144, "step": 355000 }, { "epoch": 9.42, "learning_rate": 4.891391756446678e-06, "loss": 1.2086, "step": 355500 }, { "epoch": 9.43, "learning_rate": 4.891088152030219e-06, "loss": 1.2151, "step": 356000 }, { "epoch": 9.45, "learning_rate": 4.890784741756376e-06, "loss": 1.2111, "step": 356500 }, { "epoch": 9.46, "learning_rate": 4.890480918882196e-06, "loss": 1.2204, "step": 357000 }, { "epoch": 9.47, "learning_rate": 4.890176073355719e-06, "loss": 1.227, "step": 357500 }, { "epoch": 9.49, "learning_rate": 4.889870813679004e-06, "loss": 1.2339, "step": 358000 }, { "epoch": 9.5, "learning_rate": 4.8895651399049425e-06, "loss": 1.2306, "step": 358500 }, { "epoch": 9.51, "learning_rate": 4.8892590520865e-06, "loss": 1.218, "step": 359000 }, { "epoch": 9.53, "learning_rate": 4.888952550276713e-06, "loss": 1.2228, "step": 359500 }, { "epoch": 9.54, "learning_rate": 4.88864563452869e-06, "loss": 1.2332, "step": 360000 }, { "epoch": 9.55, "learning_rate": 4.88833830489561e-06, "loss": 1.2273, "step": 360500 }, { "epoch": 9.57, "learning_rate": 4.888031177330624e-06, "loss": 1.2061, "step": 361000 }, { "epoch": 9.58, "learning_rate": 4.88772302091476e-06, "loss": 1.2242, "step": 361500 }, { "epoch": 9.59, "learning_rate": 4.887414450773703e-06, "loss": 1.218, "step": 362000 }, { "epoch": 9.61, "learning_rate": 4.8871054669609185e-06, "loss": 1.2264, "step": 362500 }, { "epoch": 9.62, "learning_rate": 4.886796069529946e-06, "loss": 1.2285, "step": 363000 }, { "epoch": 9.63, "learning_rate": 4.886486258534393e-06, "loss": 1.2361, "step": 363500 }, { "epoch": 9.65, "learning_rate": 4.886176034027944e-06, "loss": 1.2376, "step": 364000 }, { "epoch": 9.66, "learning_rate": 4.885866017752871e-06, "loss": 1.2366, "step": 364500 }, { "epoch": 9.67, "learning_rate": 4.885555589726332e-06, "loss": 1.2311, "step": 365000 }, { "epoch": 9.69, "learning_rate": 4.885244126663284e-06, "loss": 1.238, "step": 365500 }, { "epoch": 9.7, "learning_rate": 4.884932250304566e-06, "loss": 1.2222, "step": 366000 }, { "epoch": 9.71, "learning_rate": 4.8846199607042175e-06, "loss": 1.2327, "step": 366500 }, { "epoch": 9.73, "learning_rate": 4.884307257916347e-06, "loss": 1.2383, "step": 367000 }, { "epoch": 9.74, "learning_rate": 4.883994141995138e-06, "loss": 1.2232, "step": 367500 }, { "epoch": 9.75, "learning_rate": 4.883680612994847e-06, "loss": 1.201, "step": 368000 }, { "epoch": 9.77, "learning_rate": 4.883366670969796e-06, "loss": 1.2324, "step": 368500 }, { "epoch": 9.78, "learning_rate": 4.883052315974385e-06, "loss": 1.2413, "step": 369000 }, { "epoch": 9.79, "learning_rate": 4.882737548063082e-06, "loss": 1.2376, "step": 369500 }, { "epoch": 9.8, "learning_rate": 4.882422367290427e-06, "loss": 1.2251, "step": 370000 }, { "epoch": 9.82, "learning_rate": 4.882106773711033e-06, "loss": 1.2382, "step": 370500 }, { "epoch": 9.83, "learning_rate": 4.881791399804135e-06, "loss": 1.2421, "step": 371000 }, { "epoch": 9.84, "learning_rate": 4.881474981600724e-06, "loss": 1.2825, "step": 371500 }, { "epoch": 9.86, "learning_rate": 4.8811581507547275e-06, "loss": 1.2842, "step": 372000 }, { "epoch": 9.87, "learning_rate": 4.880840907321045e-06, "loss": 1.2895, "step": 372500 }, { "epoch": 9.88, "learning_rate": 4.880523887078249e-06, "loss": 1.3026, "step": 373000 }, { "epoch": 9.9, "learning_rate": 4.880205819459074e-06, "loss": 1.3, "step": 373500 }, { "epoch": 9.91, "learning_rate": 4.879887339417224e-06, "loss": 1.3, "step": 374000 }, { "epoch": 9.92, "learning_rate": 4.879568447007884e-06, "loss": 1.304, "step": 374500 }, { "epoch": 9.94, "learning_rate": 4.879249142286308e-06, "loss": 1.3006, "step": 375000 }, { "epoch": 9.95, "learning_rate": 4.878929425307822e-06, "loss": 1.2926, "step": 375500 }, { "epoch": 9.96, "learning_rate": 4.878609296127827e-06, "loss": 1.295, "step": 376000 }, { "epoch": 9.98, "learning_rate": 4.878288754801789e-06, "loss": 1.2845, "step": 376500 }, { "epoch": 9.99, "learning_rate": 4.87796780138525e-06, "loss": 1.2812, "step": 377000 }, { "epoch": 10.0, "learning_rate": 4.877647079075899e-06, "loss": 1.2815, "step": 377500 }, { "epoch": 10.02, "learning_rate": 4.877325302469169e-06, "loss": 1.2275, "step": 378000 }, { "epoch": 10.03, "learning_rate": 4.877003113938878e-06, "loss": 1.2292, "step": 378500 }, { "epoch": 10.04, "learning_rate": 4.876680513540851e-06, "loss": 1.1888, "step": 379000 }, { "epoch": 10.06, "learning_rate": 4.876358147766357e-06, "loss": 1.1984, "step": 379500 }, { "epoch": 10.07, "learning_rate": 4.876034724624079e-06, "loss": 1.1754, "step": 380000 }, { "epoch": 10.08, "learning_rate": 4.8757108897818595e-06, "loss": 1.1909, "step": 380500 }, { "epoch": 10.1, "learning_rate": 4.875386643295812e-06, "loss": 1.1928, "step": 381000 }, { "epoch": 10.11, "learning_rate": 4.8750619852221155e-06, "loss": 1.1847, "step": 381500 }, { "epoch": 10.12, "learning_rate": 4.874736915617028e-06, "loss": 1.1797, "step": 382000 }, { "epoch": 10.14, "learning_rate": 4.874412085909648e-06, "loss": 1.1904, "step": 382500 }, { "epoch": 10.15, "learning_rate": 4.874086846427514e-06, "loss": 1.1979, "step": 383000 }, { "epoch": 10.16, "learning_rate": 4.873760544211823e-06, "loss": 1.2004, "step": 383500 }, { "epoch": 10.18, "learning_rate": 4.873433830690242e-06, "loss": 1.1977, "step": 384000 }, { "epoch": 10.19, "learning_rate": 4.8731067059193815e-06, "loss": 1.1968, "step": 384500 }, { "epoch": 10.2, "learning_rate": 4.872779169955924e-06, "loss": 1.1962, "step": 385000 }, { "epoch": 10.22, "learning_rate": 4.8724518791610955e-06, "loss": 1.1854, "step": 385500 }, { "epoch": 10.23, "learning_rate": 4.872123521804872e-06, "loss": 1.1861, "step": 386000 }, { "epoch": 10.24, "learning_rate": 4.87179475342641e-06, "loss": 1.1974, "step": 386500 }, { "epoch": 10.26, "learning_rate": 4.871465574082675e-06, "loss": 1.2087, "step": 387000 }, { "epoch": 10.16, "learning_rate": 4.8738809087471e-06, "loss": 1.1914, "step": 387500 }, { "epoch": 10.17, "learning_rate": 4.873557875957663e-06, "loss": 1.1976, "step": 388000 }, { "epoch": 10.18, "learning_rate": 4.873234440732965e-06, "loss": 1.2074, "step": 388500 }, { "epoch": 10.2, "learning_rate": 4.872910603127847e-06, "loss": 1.2017, "step": 389000 }, { "epoch": 10.21, "learning_rate": 4.872587012078557e-06, "loss": 1.1831, "step": 389500 }, { "epoch": 10.22, "learning_rate": 4.872263020366089e-06, "loss": 1.183, "step": 390000 }, { "epoch": 10.24, "learning_rate": 4.871937977558167e-06, "loss": 1.2121, "step": 390500 }, { "epoch": 10.25, "learning_rate": 4.871612532589636e-06, "loss": 1.1972, "step": 391000 }, { "epoch": 10.26, "learning_rate": 4.871287337611088e-06, "loss": 1.1987, "step": 391500 }, { "epoch": 10.28, "learning_rate": 4.870961089290992e-06, "loss": 1.199, "step": 392000 }, { "epoch": 10.29, "learning_rate": 4.8706344389759185e-06, "loss": 1.2001, "step": 392500 }, { "epoch": 10.3, "learning_rate": 4.870307386721251e-06, "loss": 1.1861, "step": 393000 }, { "epoch": 10.32, "learning_rate": 4.8699799325824415e-06, "loss": 1.1936, "step": 393500 }, { "epoch": 10.33, "learning_rate": 4.86965207661501e-06, "loss": 1.203, "step": 394000 }, { "epoch": 10.34, "learning_rate": 4.869323818874543e-06, "loss": 1.1971, "step": 394500 }, { "epoch": 10.35, "learning_rate": 4.868995159416697e-06, "loss": 1.1898, "step": 395000 }, { "epoch": 10.37, "learning_rate": 4.868666098297196e-06, "loss": 1.1943, "step": 395500 }, { "epoch": 10.38, "learning_rate": 4.8683366355718324e-06, "loss": 1.219, "step": 396000 }, { "epoch": 10.39, "learning_rate": 4.8680067712964665e-06, "loss": 1.2131, "step": 396500 }, { "epoch": 10.41, "learning_rate": 4.8676771664592185e-06, "loss": 1.2046, "step": 397000 }, { "epoch": 10.42, "learning_rate": 4.86734650005452e-06, "loss": 1.1869, "step": 397500 }, { "epoch": 10.43, "learning_rate": 4.867015432267696e-06, "loss": 1.2119, "step": 398000 }, { "epoch": 10.45, "learning_rate": 4.866683963154878e-06, "loss": 1.1984, "step": 398500 }, { "epoch": 10.46, "learning_rate": 4.866352092772267e-06, "loss": 1.197, "step": 399000 }, { "epoch": 10.47, "learning_rate": 4.866019821176132e-06, "loss": 1.2049, "step": 399500 }, { "epoch": 10.49, "learning_rate": 4.865687148422809e-06, "loss": 1.1992, "step": 400000 }, { "epoch": 10.5, "learning_rate": 4.865354074568701e-06, "loss": 1.2089, "step": 400500 }, { "epoch": 10.51, "learning_rate": 4.865021267020285e-06, "loss": 1.2094, "step": 401000 }, { "epoch": 10.52, "learning_rate": 4.864687391936014e-06, "loss": 1.1975, "step": 401500 }, { "epoch": 10.54, "learning_rate": 4.864353115920468e-06, "loss": 1.212, "step": 402000 }, { "epoch": 10.55, "learning_rate": 4.864018439030322e-06, "loss": 1.1958, "step": 402500 }, { "epoch": 10.56, "learning_rate": 4.863684031877717e-06, "loss": 1.2087, "step": 403000 }, { "epoch": 10.58, "learning_rate": 4.863348554210139e-06, "loss": 1.2177, "step": 403500 }, { "epoch": 10.59, "learning_rate": 4.863012675838284e-06, "loss": 1.2081, "step": 404000 }, { "epoch": 10.6, "learning_rate": 4.8626770697769475e-06, "loss": 1.1909, "step": 404500 }, { "epoch": 10.62, "learning_rate": 4.862340390968576e-06, "loss": 1.2031, "step": 405000 }, { "epoch": 10.63, "learning_rate": 4.86200331162686e-06, "loss": 1.214, "step": 405500 }, { "epoch": 10.64, "learning_rate": 4.8616658318089535e-06, "loss": 1.2061, "step": 406000 }, { "epoch": 10.66, "learning_rate": 4.861327951572075e-06, "loss": 1.2063, "step": 406500 }, { "epoch": 10.67, "learning_rate": 4.860989670973512e-06, "loss": 1.1869, "step": 407000 }, { "epoch": 10.68, "learning_rate": 4.86065099007062e-06, "loss": 1.2214, "step": 407500 }, { "epoch": 10.7, "learning_rate": 4.860311908920821e-06, "loss": 1.1971, "step": 408000 }, { "epoch": 10.71, "learning_rate": 4.859972427581606e-06, "loss": 1.2011, "step": 408500 }, { "epoch": 10.72, "learning_rate": 4.859633226272771e-06, "loss": 1.181, "step": 409000 }, { "epoch": 10.73, "learning_rate": 4.859292945527562e-06, "loss": 1.2205, "step": 409500 }, { "epoch": 10.75, "learning_rate": 4.8589522647657026e-06, "loss": 1.213, "step": 410000 }, { "epoch": 10.76, "learning_rate": 4.858611184044954e-06, "loss": 1.2132, "step": 410500 }, { "epoch": 10.77, "learning_rate": 4.858269703423148e-06, "loss": 1.2043, "step": 411000 }, { "epoch": 10.79, "learning_rate": 4.857928507118115e-06, "loss": 1.208, "step": 411500 }, { "epoch": 10.8, "learning_rate": 4.857586227667465e-06, "loss": 1.221, "step": 412000 }, { "epoch": 10.81, "learning_rate": 4.857244234246783e-06, "loss": 1.2041, "step": 412500 }, { "epoch": 10.83, "learning_rate": 4.85690115619896e-06, "loss": 1.2149, "step": 413000 }, { "epoch": 10.84, "learning_rate": 4.856557678540012e-06, "loss": 1.2073, "step": 413500 }, { "epoch": 10.85, "learning_rate": 4.856213801328176e-06, "loss": 1.2172, "step": 414000 }, { "epoch": 10.87, "learning_rate": 4.855869524621757e-06, "loss": 1.202, "step": 414500 }, { "epoch": 10.88, "learning_rate": 4.855524848479127e-06, "loss": 1.2053, "step": 415000 }, { "epoch": 10.89, "learning_rate": 4.855179772958722e-06, "loss": 1.2172, "step": 415500 }, { "epoch": 10.9, "learning_rate": 4.854834298119055e-06, "loss": 1.2146, "step": 416000 }, { "epoch": 10.92, "learning_rate": 4.854488424018698e-06, "loss": 1.2087, "step": 416500 }, { "epoch": 10.93, "learning_rate": 4.854143536604635e-06, "loss": 1.203, "step": 417000 }, { "epoch": 10.94, "learning_rate": 4.85379686575535e-06, "loss": 1.213, "step": 417500 }, { "epoch": 10.96, "learning_rate": 4.853450490359387e-06, "loss": 1.2252, "step": 418000 }, { "epoch": 10.97, "learning_rate": 4.853103022197354e-06, "loss": 1.2183, "step": 418500 }, { "epoch": 10.98, "learning_rate": 4.852755155068167e-06, "loss": 1.2286, "step": 419000 }, { "epoch": 11.0, "learning_rate": 4.8524068890308085e-06, "loss": 1.2051, "step": 419500 }, { "epoch": 11.01, "learning_rate": 4.852058224144326e-06, "loss": 1.17, "step": 420000 }, { "epoch": 11.02, "learning_rate": 4.851709160467835e-06, "loss": 1.1583, "step": 420500 }, { "epoch": 11.04, "learning_rate": 4.851359698060518e-06, "loss": 1.1584, "step": 421000 }, { "epoch": 11.05, "learning_rate": 4.851009836981629e-06, "loss": 1.1464, "step": 421500 }, { "epoch": 11.06, "learning_rate": 4.850659577290486e-06, "loss": 1.1508, "step": 422000 }, { "epoch": 11.08, "learning_rate": 4.8503096207606785e-06, "loss": 1.1493, "step": 422500 }, { "epoch": 11.09, "learning_rate": 4.849958564820179e-06, "loss": 1.1648, "step": 423000 }, { "epoch": 11.1, "learning_rate": 4.849607110445667e-06, "loss": 1.1585, "step": 423500 }, { "epoch": 11.11, "learning_rate": 4.849255257696732e-06, "loss": 1.1564, "step": 424000 }, { "epoch": 11.13, "learning_rate": 4.8489030066330305e-06, "loss": 1.174, "step": 424500 }, { "epoch": 11.14, "learning_rate": 4.848550357314286e-06, "loss": 1.1684, "step": 425000 }, { "epoch": 11.15, "learning_rate": 4.84819730980029e-06, "loss": 1.172, "step": 425500 }, { "epoch": 11.17, "learning_rate": 4.847843864150902e-06, "loss": 1.1606, "step": 426000 }, { "epoch": 11.18, "learning_rate": 4.847490728510736e-06, "loss": 1.1452, "step": 426500 }, { "epoch": 11.19, "learning_rate": 4.847137196445449e-06, "loss": 1.1545, "step": 427000 }, { "epoch": 11.21, "learning_rate": 4.846782558341414e-06, "loss": 1.1576, "step": 427500 }, { "epoch": 11.22, "learning_rate": 4.846427522341857e-06, "loss": 1.1528, "step": 428000 }, { "epoch": 11.23, "learning_rate": 4.8460720885069726e-06, "loss": 1.1541, "step": 428500 }, { "epoch": 11.25, "learning_rate": 4.845716256897027e-06, "loss": 1.1655, "step": 429000 }, { "epoch": 11.26, "learning_rate": 4.845360027572349e-06, "loss": 1.1485, "step": 429500 }, { "epoch": 11.27, "learning_rate": 4.845003400593338e-06, "loss": 1.174, "step": 430000 }, { "epoch": 11.28, "learning_rate": 4.844646376020461e-06, "loss": 1.1586, "step": 430500 }, { "epoch": 11.3, "learning_rate": 4.844288953914249e-06, "loss": 1.1561, "step": 431000 }, { "epoch": 11.31, "learning_rate": 4.8439318503711e-06, "loss": 1.1568, "step": 431500 }, { "epoch": 11.32, "learning_rate": 4.843574351003822e-06, "loss": 1.174, "step": 432000 }, { "epoch": 11.34, "learning_rate": 4.843215738250767e-06, "loss": 1.1486, "step": 432500 }, { "epoch": 11.35, "learning_rate": 4.8428567282069416e-06, "loss": 1.1571, "step": 433000 }, { "epoch": 11.36, "learning_rate": 4.842497320933216e-06, "loss": 1.1784, "step": 433500 }, { "epoch": 11.38, "learning_rate": 4.842137516490527e-06, "loss": 1.1644, "step": 434000 }, { "epoch": 11.39, "learning_rate": 4.841777314939881e-06, "loss": 1.1631, "step": 434500 }, { "epoch": 11.4, "learning_rate": 4.841416716342348e-06, "loss": 1.1528, "step": 435000 }, { "epoch": 11.42, "learning_rate": 4.841055720759067e-06, "loss": 1.1698, "step": 435500 }, { "epoch": 11.43, "learning_rate": 4.840695051432351e-06, "loss": 1.1567, "step": 436000 }, { "epoch": 11.44, "learning_rate": 4.84033326285493e-06, "loss": 1.1675, "step": 436500 }, { "epoch": 11.46, "learning_rate": 4.839971077475459e-06, "loss": 1.1662, "step": 437000 }, { "epoch": 11.47, "learning_rate": 4.839608495355349e-06, "loss": 1.1727, "step": 437500 }, { "epoch": 11.48, "learning_rate": 4.839246242909518e-06, "loss": 1.1743, "step": 438000 }, { "epoch": 11.49, "learning_rate": 4.838882868285796e-06, "loss": 1.1701, "step": 438500 }, { "epoch": 11.51, "learning_rate": 4.83851982504402e-06, "loss": 1.1748, "step": 439000 }, { "epoch": 11.52, "learning_rate": 4.838155658162633e-06, "loss": 1.1792, "step": 439500 }, { "epoch": 11.53, "learning_rate": 4.837791094848409e-06, "loss": 1.1614, "step": 440000 }, { "epoch": 11.55, "learning_rate": 4.83742613516316e-06, "loss": 1.1754, "step": 440500 }, { "epoch": 11.56, "learning_rate": 4.837060779168764e-06, "loss": 1.1705, "step": 441000 }, { "epoch": 11.57, "learning_rate": 4.8366950269271675e-06, "loss": 1.1744, "step": 441500 }, { "epoch": 11.59, "learning_rate": 4.8363288785003836e-06, "loss": 1.1739, "step": 442000 }, { "epoch": 11.6, "learning_rate": 4.835963067434883e-06, "loss": 1.1749, "step": 442500 }, { "epoch": 11.61, "learning_rate": 4.835596127616091e-06, "loss": 1.1778, "step": 443000 }, { "epoch": 11.63, "learning_rate": 4.83522879179843e-06, "loss": 1.1748, "step": 443500 }, { "epoch": 11.64, "learning_rate": 4.834861060044179e-06, "loss": 1.1721, "step": 444000 }, { "epoch": 11.65, "learning_rate": 4.834492932415691e-06, "loss": 1.1626, "step": 444500 }, { "epoch": 11.67, "learning_rate": 4.8341244089753775e-06, "loss": 1.1839, "step": 445000 }, { "epoch": 11.68, "learning_rate": 4.833755489785724e-06, "loss": 1.1904, "step": 445500 }, { "epoch": 11.69, "learning_rate": 4.833386174909278e-06, "loss": 1.1619, "step": 446000 }, { "epoch": 11.7, "learning_rate": 4.833017204224453e-06, "loss": 1.1737, "step": 446500 }, { "epoch": 11.72, "learning_rate": 4.832647098953405e-06, "loss": 1.1587, "step": 447000 }, { "epoch": 11.73, "learning_rate": 4.832276598183493e-06, "loss": 1.1805, "step": 447500 }, { "epoch": 11.74, "learning_rate": 4.831905701977536e-06, "loss": 1.1755, "step": 448000 }, { "epoch": 11.76, "learning_rate": 4.8315358963522344e-06, "loss": 1.1717, "step": 448500 }, { "epoch": 11.77, "learning_rate": 4.831164211044022e-06, "loss": 1.1802, "step": 449000 }, { "epoch": 11.78, "learning_rate": 4.830792130488371e-06, "loss": 1.1907, "step": 449500 }, { "epoch": 11.8, "learning_rate": 4.830419654748364e-06, "loss": 1.1751, "step": 450000 }, { "epoch": 11.81, "learning_rate": 4.830046783887155e-06, "loss": 1.192, "step": 450500 }, { "epoch": 11.82, "learning_rate": 4.829673517967963e-06, "loss": 1.1919, "step": 451000 }, { "epoch": 11.84, "learning_rate": 4.829299857054076e-06, "loss": 1.172, "step": 451500 }, { "epoch": 11.85, "learning_rate": 4.828925801208848e-06, "loss": 1.1816, "step": 452000 }, { "epoch": 11.86, "learning_rate": 4.8285513504957e-06, "loss": 1.1849, "step": 452500 }, { "epoch": 11.87, "learning_rate": 4.82817650497812e-06, "loss": 1.172, "step": 453000 }, { "epoch": 11.89, "learning_rate": 4.827801264719662e-06, "loss": 1.1703, "step": 453500 }, { "epoch": 11.9, "learning_rate": 4.827425629783949e-06, "loss": 1.2058, "step": 454000 }, { "epoch": 11.91, "learning_rate": 4.827050352687549e-06, "loss": 1.1785, "step": 454500 }, { "epoch": 11.93, "learning_rate": 4.826673929377493e-06, "loss": 1.1915, "step": 455000 }, { "epoch": 11.94, "learning_rate": 4.82629711158132e-06, "loss": 1.1824, "step": 455500 }, { "epoch": 11.95, "learning_rate": 4.82591989936292e-06, "loss": 1.1688, "step": 456000 }, { "epoch": 11.97, "learning_rate": 4.825542292786247e-06, "loss": 1.1779, "step": 456500 }, { "epoch": 11.98, "learning_rate": 4.825165048310532e-06, "loss": 1.1804, "step": 457000 }, { "epoch": 11.99, "learning_rate": 4.82478665399785e-06, "loss": 1.1804, "step": 457500 }, { "epoch": 12.01, "learning_rate": 4.824407865519037e-06, "loss": 1.1613, "step": 458000 }, { "epoch": 12.02, "learning_rate": 4.824028682938317e-06, "loss": 1.1089, "step": 458500 }, { "epoch": 12.03, "learning_rate": 4.82364910631998e-06, "loss": 1.1197, "step": 459000 }, { "epoch": 12.05, "learning_rate": 4.823269896062709e-06, "loss": 1.1173, "step": 459500 }, { "epoch": 12.06, "learning_rate": 4.82288953235003e-06, "loss": 1.123, "step": 460000 }, { "epoch": 12.07, "learning_rate": 4.822509536701004e-06, "loss": 1.1095, "step": 460500 }, { "epoch": 12.08, "learning_rate": 4.8221283861514285e-06, "loss": 1.1206, "step": 461000 }, { "epoch": 12.1, "learning_rate": 4.82174684188643e-06, "loss": 1.1055, "step": 461500 }, { "epoch": 12.11, "learning_rate": 4.821364903970699e-06, "loss": 1.1172, "step": 462000 }, { "epoch": 12.12, "learning_rate": 4.820982572468994e-06, "loss": 1.1237, "step": 462500 }, { "epoch": 12.14, "learning_rate": 4.820599847446137e-06, "loss": 1.122, "step": 463000 }, { "epoch": 12.15, "learning_rate": 4.820216728967021e-06, "loss": 1.0988, "step": 463500 }, { "epoch": 12.16, "learning_rate": 4.819833217096603e-06, "loss": 1.1121, "step": 464000 }, { "epoch": 12.18, "learning_rate": 4.819449311899906e-06, "loss": 1.1237, "step": 464500 }, { "epoch": 12.19, "learning_rate": 4.819065782431369e-06, "loss": 1.12, "step": 465000 }, { "epoch": 12.2, "learning_rate": 4.818681091563782e-06, "loss": 1.1237, "step": 465500 }, { "epoch": 12.22, "learning_rate": 4.818296007565259e-06, "loss": 1.1215, "step": 466000 }, { "epoch": 12.23, "learning_rate": 4.817910530501091e-06, "loss": 1.1335, "step": 466500 }, { "epoch": 12.24, "learning_rate": 4.817524660436635e-06, "loss": 1.114, "step": 467000 }, { "epoch": 12.25, "learning_rate": 4.817138397437315e-06, "loss": 1.1203, "step": 467500 }, { "epoch": 12.27, "learning_rate": 4.816751741568621e-06, "loss": 1.1168, "step": 468000 }, { "epoch": 12.28, "learning_rate": 4.816364692896113e-06, "loss": 1.12, "step": 468500 }, { "epoch": 12.29, "learning_rate": 4.815978026760142e-06, "loss": 1.1325, "step": 469000 }, { "epoch": 12.31, "learning_rate": 4.815590193462219e-06, "loss": 1.1225, "step": 469500 }, { "epoch": 12.32, "learning_rate": 4.815201967557418e-06, "loss": 1.1121, "step": 470000 }, { "epoch": 12.33, "learning_rate": 4.814813349111565e-06, "loss": 1.1315, "step": 470500 }, { "epoch": 12.35, "learning_rate": 4.814425116604039e-06, "loss": 1.1365, "step": 471000 }, { "epoch": 12.36, "learning_rate": 4.814036493255242e-06, "loss": 1.1201, "step": 471500 }, { "epoch": 12.37, "learning_rate": 4.8136466991510785e-06, "loss": 1.1359, "step": 472000 }, { "epoch": 12.39, "learning_rate": 4.813256512769557e-06, "loss": 1.1303, "step": 472500 }, { "epoch": 12.4, "learning_rate": 4.812865934176834e-06, "loss": 1.1264, "step": 473000 }, { "epoch": 12.41, "learning_rate": 4.812474963439131e-06, "loss": 1.1238, "step": 473500 }, { "epoch": 12.43, "learning_rate": 4.8120843837396205e-06, "loss": 1.1424, "step": 474000 }, { "epoch": 12.44, "learning_rate": 4.8116926296948485e-06, "loss": 1.136, "step": 474500 }, { "epoch": 12.45, "learning_rate": 4.811300483704031e-06, "loss": 1.1284, "step": 475000 }, { "epoch": 12.46, "learning_rate": 4.810907945833655e-06, "loss": 1.1467, "step": 475500 }, { "epoch": 12.48, "learning_rate": 4.810515802400626e-06, "loss": 1.1243, "step": 476000 }, { "epoch": 12.49, "learning_rate": 4.810122481754289e-06, "loss": 1.1339, "step": 476500 }, { "epoch": 12.5, "learning_rate": 4.809728769428122e-06, "loss": 1.1389, "step": 477000 }, { "epoch": 12.52, "learning_rate": 4.809334665488881e-06, "loss": 1.1359, "step": 477500 }, { "epoch": 12.53, "learning_rate": 4.8089401700033835e-06, "loss": 1.1389, "step": 478000 }, { "epoch": 12.54, "learning_rate": 4.808545283038518e-06, "loss": 1.1266, "step": 478500 }, { "epoch": 12.56, "learning_rate": 4.808150004661236e-06, "loss": 1.1358, "step": 479000 }, { "epoch": 12.57, "learning_rate": 4.807754334938557e-06, "loss": 1.1402, "step": 479500 }, { "epoch": 12.58, "learning_rate": 4.807358273937567e-06, "loss": 1.1276, "step": 480000 }, { "epoch": 12.6, "learning_rate": 4.806962615020226e-06, "loss": 1.1438, "step": 480500 }, { "epoch": 12.61, "learning_rate": 4.806565772446356e-06, "loss": 1.1376, "step": 481000 }, { "epoch": 12.62, "learning_rate": 4.806168538795695e-06, "loss": 1.1352, "step": 481500 }, { "epoch": 12.63, "learning_rate": 4.805770914135594e-06, "loss": 1.1314, "step": 482000 }, { "epoch": 12.65, "learning_rate": 4.805373694954788e-06, "loss": 1.1325, "step": 482500 }, { "epoch": 12.66, "learning_rate": 4.804975289259805e-06, "loss": 1.1302, "step": 483000 }, { "epoch": 12.67, "learning_rate": 4.804576492757695e-06, "loss": 1.1332, "step": 483500 }, { "epoch": 12.69, "learning_rate": 4.804178104280471e-06, "loss": 1.154, "step": 484000 }, { "epoch": 12.7, "learning_rate": 4.8037785271483004e-06, "loss": 1.1361, "step": 484500 }, { "epoch": 12.71, "learning_rate": 4.803378559411913e-06, "loss": 1.1456, "step": 485000 }, { "epoch": 12.73, "learning_rate": 4.802978201139125e-06, "loss": 1.1398, "step": 485500 }, { "epoch": 12.74, "learning_rate": 4.802577452397815e-06, "loss": 1.145, "step": 486000 }, { "epoch": 12.75, "learning_rate": 4.802176313255932e-06, "loss": 1.1377, "step": 486500 }, { "epoch": 12.77, "learning_rate": 4.801775587229943e-06, "loss": 1.1384, "step": 487000 }, { "epoch": 12.78, "learning_rate": 4.801373668271478e-06, "loss": 1.1282, "step": 487500 }, { "epoch": 12.79, "learning_rate": 4.800971359116539e-06, "loss": 1.141, "step": 488000 }, { "epoch": 12.81, "learning_rate": 4.800568659833339e-06, "loss": 1.1376, "step": 488500 }, { "epoch": 12.82, "learning_rate": 4.800165570490154e-06, "loss": 1.1313, "step": 489000 }, { "epoch": 12.83, "learning_rate": 4.799762091155328e-06, "loss": 1.149, "step": 489500 }, { "epoch": 12.84, "learning_rate": 4.79935822189727e-06, "loss": 1.1365, "step": 490000 }, { "epoch": 12.86, "learning_rate": 4.798953962784457e-06, "loss": 1.1408, "step": 490500 }, { "epoch": 12.87, "learning_rate": 4.79855012357219e-06, "loss": 1.1372, "step": 491000 }, { "epoch": 12.88, "learning_rate": 4.7981450857349246e-06, "loss": 1.1431, "step": 491500 }, { "epoch": 12.9, "learning_rate": 4.79773965824859e-06, "loss": 1.1479, "step": 492000 }, { "epoch": 12.91, "learning_rate": 4.797333841181927e-06, "loss": 1.1326, "step": 492500 }, { "epoch": 12.92, "learning_rate": 4.796928447405582e-06, "loss": 1.1603, "step": 493000 }, { "epoch": 12.94, "learning_rate": 4.796521852163562e-06, "loss": 1.1421, "step": 493500 }, { "epoch": 12.95, "learning_rate": 4.796115681905471e-06, "loss": 1.1401, "step": 494000 }, { "epoch": 12.96, "learning_rate": 4.7957083087632925e-06, "loss": 1.1387, "step": 494500 }, { "epoch": 12.98, "learning_rate": 4.7953005463852e-06, "loss": 1.1333, "step": 495000 }, { "epoch": 12.99, "learning_rate": 4.7948923948403284e-06, "loss": 1.1458, "step": 495500 }, { "epoch": 13.0, "learning_rate": 4.7944838541978784e-06, "loss": 1.1453, "step": 496000 }, { "epoch": 13.02, "learning_rate": 4.794075742774665e-06, "loss": 1.0791, "step": 496500 }, { "epoch": 13.03, "learning_rate": 4.793666424922779e-06, "loss": 1.0822, "step": 497000 }, { "epoch": 13.04, "learning_rate": 4.793256718181178e-06, "loss": 1.0778, "step": 497500 }, { "epoch": 13.05, "learning_rate": 4.792846622619328e-06, "loss": 1.0792, "step": 498000 }, { "epoch": 13.07, "learning_rate": 4.792436138306759e-06, "loss": 1.0725, "step": 498500 }, { "epoch": 13.08, "learning_rate": 4.792025265313071e-06, "loss": 1.085, "step": 499000 }, { "epoch": 13.09, "learning_rate": 4.791614003707925e-06, "loss": 1.0714, "step": 499500 }, { "epoch": 13.11, "learning_rate": 4.79120235356105e-06, "loss": 1.0867, "step": 500000 }, { "epoch": 13.12, "learning_rate": 4.790790314942243e-06, "loss": 1.0799, "step": 500500 }, { "epoch": 13.13, "learning_rate": 4.790377887921363e-06, "loss": 1.0864, "step": 501000 }, { "epoch": 13.15, "learning_rate": 4.789965072568339e-06, "loss": 1.0776, "step": 501500 }, { "epoch": 13.16, "learning_rate": 4.78955186895316e-06, "loss": 1.08, "step": 502000 }, { "epoch": 13.17, "learning_rate": 4.789139104716872e-06, "loss": 1.0866, "step": 502500 }, { "epoch": 13.19, "learning_rate": 4.788725125563804e-06, "loss": 1.0822, "step": 503000 }, { "epoch": 13.2, "learning_rate": 4.788310758358814e-06, "loss": 1.0851, "step": 503500 }, { "epoch": 13.21, "learning_rate": 4.78789600317216e-06, "loss": 1.0887, "step": 504000 }, { "epoch": 13.22, "learning_rate": 4.787481690747446e-06, "loss": 1.0913, "step": 504500 }, { "epoch": 13.24, "learning_rate": 4.787066160584104e-06, "loss": 1.0943, "step": 505000 }, { "epoch": 13.25, "learning_rate": 4.786651074872933e-06, "loss": 1.0976, "step": 505500 }, { "epoch": 13.26, "learning_rate": 4.786234770014151e-06, "loss": 1.0931, "step": 506000 }, { "epoch": 13.28, "learning_rate": 4.785818077525687e-06, "loss": 1.0832, "step": 506500 }, { "epoch": 13.29, "learning_rate": 4.785400997478189e-06, "loss": 1.102, "step": 507000 }, { "epoch": 13.3, "learning_rate": 4.784983529942376e-06, "loss": 1.1045, "step": 507500 }, { "epoch": 13.32, "learning_rate": 4.784565674989026e-06, "loss": 1.096, "step": 508000 }, { "epoch": 13.33, "learning_rate": 4.7841482695601125e-06, "loss": 1.0983, "step": 508500 }, { "epoch": 13.34, "learning_rate": 4.783729640758779e-06, "loss": 1.098, "step": 509000 }, { "epoch": 13.36, "learning_rate": 4.7833106247525075e-06, "loss": 1.0947, "step": 509500 }, { "epoch": 13.37, "learning_rate": 4.782891221612339e-06, "loss": 1.0907, "step": 510000 }, { "epoch": 13.38, "learning_rate": 4.782471431409386e-06, "loss": 1.0983, "step": 510500 }, { "epoch": 13.4, "learning_rate": 4.782051254214823e-06, "loss": 1.0879, "step": 511000 }, { "epoch": 13.41, "learning_rate": 4.7816306900998875e-06, "loss": 1.09, "step": 511500 }, { "epoch": 13.42, "learning_rate": 4.781210581423846e-06, "loss": 1.0785, "step": 512000 }, { "epoch": 13.43, "learning_rate": 4.7807892444556414e-06, "loss": 1.101, "step": 512500 }, { "epoch": 13.45, "learning_rate": 4.780367520781039e-06, "loss": 1.0893, "step": 513000 }, { "epoch": 13.46, "learning_rate": 4.779945410471543e-06, "loss": 1.108, "step": 513500 }, { "epoch": 13.47, "learning_rate": 4.779522913598722e-06, "loss": 1.1003, "step": 514000 }, { "epoch": 13.49, "learning_rate": 4.779100876386609e-06, "loss": 1.0957, "step": 514500 }, { "epoch": 13.5, "learning_rate": 4.778678454298496e-06, "loss": 1.0968, "step": 515000 }, { "epoch": 13.51, "learning_rate": 4.778254799711014e-06, "loss": 1.087, "step": 515500 }, { "epoch": 13.53, "learning_rate": 4.777830758846849e-06, "loss": 1.0854, "step": 516000 }, { "epoch": 13.54, "learning_rate": 1.182370800470857e-06, "loss": 1.0918, "step": 516500 }, { "epoch": 13.55, "learning_rate": 1.177999464679584e-06, "loss": 1.1075, "step": 517000 }, { "epoch": 13.57, "learning_rate": 1.1736337324956105e-06, "loss": 1.0831, "step": 517500 }, { "epoch": 13.58, "learning_rate": 1.169273622424111e-06, "loss": 1.0886, "step": 518000 }, { "epoch": 13.59, "learning_rate": 1.1649191529464277e-06, "loss": 1.0991, "step": 518500 }, { "epoch": 13.6, "learning_rate": 1.1605703425199926e-06, "loss": 1.0869, "step": 519000 }, { "epoch": 13.62, "learning_rate": 1.1562358901657684e-06, "loss": 1.086, "step": 519500 }, { "epoch": 13.63, "learning_rate": 1.151898441707961e-06, "loss": 1.0844, "step": 520000 }, { "epoch": 13.64, "learning_rate": 1.147575365246131e-06, "loss": 1.0818, "step": 520500 }, { "epoch": 13.66, "learning_rate": 1.1432493521510088e-06, "loss": 1.0782, "step": 521000 }, { "epoch": 13.67, "learning_rate": 1.1389290899596366e-06, "loss": 1.0899, "step": 521500 }, { "epoch": 13.68, "learning_rate": 1.1346145969844517e-06, "loss": 1.0921, "step": 522000 }, { "epoch": 13.7, "learning_rate": 1.1303058915134376e-06, "loss": 1.0767, "step": 522500 }, { "epoch": 13.71, "learning_rate": 1.1260029918100456e-06, "loss": 1.0871, "step": 523000 }, { "epoch": 13.72, "learning_rate": 1.1217059161131205e-06, "loss": 1.0825, "step": 523500 }, { "epoch": 13.74, "learning_rate": 1.1174146826368182e-06, "loss": 1.0772, "step": 524000 }, { "epoch": 13.75, "learning_rate": 1.1131293095705312e-06, "loss": 1.0944, "step": 524500 }, { "epoch": 13.76, "learning_rate": 1.108849815078811e-06, "loss": 1.1008, "step": 525000 }, { "epoch": 13.78, "learning_rate": 1.104576217301294e-06, "loss": 1.0844, "step": 525500 }, { "epoch": 13.79, "learning_rate": 1.10030853435262e-06, "loss": 1.0865, "step": 526000 }, { "epoch": 13.8, "learning_rate": 1.096046784322356e-06, "loss": 1.0873, "step": 526500 }, { "epoch": 13.81, "learning_rate": 1.0917909852749228e-06, "loss": 1.0842, "step": 527000 }, { "epoch": 13.83, "learning_rate": 1.0875411552495178e-06, "loss": 1.0884, "step": 527500 }, { "epoch": 13.84, "learning_rate": 1.0833057939589902e-06, "loss": 1.0932, "step": 528000 }, { "epoch": 13.85, "learning_rate": 1.0790679439659621e-06, "loss": 1.0737, "step": 528500 }, { "epoch": 13.87, "learning_rate": 1.074836116924536e-06, "loss": 1.0812, "step": 529000 }, { "epoch": 13.88, "learning_rate": 1.0706103307722973e-06, "loss": 1.1034, "step": 529500 }, { "epoch": 13.89, "learning_rate": 1.0663990368173505e-06, "loss": 1.0821, "step": 530000 }, { "epoch": 13.91, "learning_rate": 1.062185373982534e-06, "loss": 1.072, "step": 530500 }, { "epoch": 13.92, "learning_rate": 1.0579778056600256e-06, "loss": 1.0839, "step": 531000 }, { "epoch": 13.93, "learning_rate": 1.053776349684586e-06, "loss": 1.0819, "step": 531500 }, { "epoch": 13.95, "learning_rate": 1.049581023865064e-06, "loss": 1.0867, "step": 532000 }, { "epoch": 13.96, "learning_rate": 1.0454002181926377e-06, "loss": 1.0671, "step": 532500 }, { "epoch": 13.97, "learning_rate": 1.0412255535423441e-06, "loss": 1.0848, "step": 533000 }, { "epoch": 13.98, "learning_rate": 1.0370487000144682e-06, "loss": 1.078, "step": 533500 }, { "epoch": 14.0, "learning_rate": 1.0328780475465507e-06, "loss": 1.083, "step": 534000 }, { "epoch": 14.01, "learning_rate": 1.0287136138168734e-06, "loss": 1.0407, "step": 534500 }, { "epoch": 14.02, "learning_rate": 1.0245554164773602e-06, "loss": 1.0175, "step": 535000 }, { "epoch": 14.04, "learning_rate": 1.0204034731534989e-06, "loss": 1.0427, "step": 535500 }, { "epoch": 14.05, "learning_rate": 1.0162578014442684e-06, "loss": 1.037, "step": 536000 }, { "epoch": 14.06, "learning_rate": 1.0121184189220635e-06, "loss": 1.0289, "step": 536500 }, { "epoch": 14.08, "learning_rate": 1.0079853431326231e-06, "loss": 1.0478, "step": 537000 }, { "epoch": 14.09, "learning_rate": 1.0038668387747919e-06, "loss": 1.0367, "step": 537500 }, { "epoch": 14.1, "learning_rate": 9.99746416280159e-07, "loss": 1.0446, "step": 538000 }, { "epoch": 14.12, "learning_rate": 9.956323529599061e-07, "loss": 1.0428, "step": 538500 }, { "epoch": 14.13, "learning_rate": 9.91532875250426e-07, "loss": 1.0412, "step": 539000 }, { "epoch": 14.14, "learning_rate": 9.874315697617445e-07, "loss": 1.0476, "step": 539500 }, { "epoch": 14.16, "learning_rate": 9.833366756467894e-07, "loss": 1.033, "step": 540000 }, { "epoch": 14.17, "learning_rate": 9.792482102627217e-07, "loss": 1.0451, "step": 540500 }, { "epoch": 14.18, "learning_rate": 9.751661909394571e-07, "loss": 1.0456, "step": 541000 }, { "epoch": 14.19, "learning_rate": 9.710906349795847e-07, "loss": 1.0404, "step": 541500 }, { "epoch": 14.21, "learning_rate": 9.670215596582979e-07, "loss": 1.0376, "step": 542000 }, { "epoch": 14.22, "learning_rate": 9.629589822233198e-07, "loss": 1.0324, "step": 542500 }, { "epoch": 14.23, "learning_rate": 9.589110255059818e-07, "loss": 1.0485, "step": 543000 }, { "epoch": 14.25, "learning_rate": 9.54869574950421e-07, "loss": 1.0371, "step": 543500 }, { "epoch": 14.26, "learning_rate": 9.50826568152911e-07, "loss": 1.0325, "step": 544000 }, { "epoch": 14.27, "learning_rate": 9.467901278879547e-07, "loss": 1.026, "step": 544500 }, { "epoch": 14.29, "learning_rate": 9.427602712649459e-07, "loss": 1.0272, "step": 545000 }, { "epoch": 14.3, "learning_rate": 9.387370153653708e-07, "loss": 1.0499, "step": 545500 }, { "epoch": 14.31, "learning_rate": 9.347203772427363e-07, "loss": 1.0289, "step": 546000 }, { "epoch": 14.33, "learning_rate": 9.307103739224985e-07, "loss": 1.0375, "step": 546500 }, { "epoch": 14.34, "learning_rate": 9.267070224019925e-07, "loss": 1.0378, "step": 547000 }, { "epoch": 14.35, "learning_rate": 9.227103396503556e-07, "loss": 1.0401, "step": 547500 }, { "epoch": 14.37, "learning_rate": 9.187203426084587e-07, "loss": 1.029, "step": 548000 }, { "epoch": 14.38, "learning_rate": 9.147450080772296e-07, "loss": 1.0448, "step": 548500 }, { "epoch": 14.39, "learning_rate": 9.10768419708154e-07, "loss": 1.0397, "step": 549000 }, { "epoch": 14.4, "learning_rate": 9.068065006374663e-07, "loss": 1.049, "step": 549500 }, { "epoch": 14.42, "learning_rate": 9.028433882291488e-07, "loss": 1.0532, "step": 550000 }, { "epoch": 14.43, "learning_rate": 8.988870457412748e-07, "loss": 1.0347, "step": 550500 }, { "epoch": 14.44, "learning_rate": 8.949374899437233e-07, "loss": 1.0299, "step": 551000 }, { "epoch": 14.46, "learning_rate": 8.909947375776068e-07, "loss": 1.0431, "step": 551500 }, { "epoch": 14.47, "learning_rate": 8.870666704020464e-07, "loss": 1.0337, "step": 552000 }, { "epoch": 14.48, "learning_rate": 8.831375613164294e-07, "loss": 1.0288, "step": 552500 }, { "epoch": 14.5, "learning_rate": 8.792153056789934e-07, "loss": 1.0518, "step": 553000 }, { "epoch": 14.51, "learning_rate": 8.752999201151344e-07, "loss": 1.0395, "step": 553500 }, { "epoch": 14.52, "learning_rate": 8.713914212211255e-07, "loss": 1.0389, "step": 554000 }, { "epoch": 14.54, "learning_rate": 8.674898255640501e-07, "loss": 1.0383, "step": 554500 }, { "epoch": 14.55, "learning_rate": 8.636029321165817e-07, "loss": 1.0271, "step": 555000 }, { "epoch": 14.56, "learning_rate": 8.597151786284866e-07, "loss": 1.0468, "step": 555500 }, { "epoch": 14.57, "learning_rate": 8.558343778697989e-07, "loss": 1.0361, "step": 556000 }, { "epoch": 14.59, "learning_rate": 8.519605462901987e-07, "loss": 1.0345, "step": 556500 }, { "epoch": 14.6, "learning_rate": 8.480937003098225e-07, "loss": 1.0361, "step": 557000 }, { "epoch": 14.61, "learning_rate": 8.442338563191984e-07, "loss": 1.0274, "step": 557500 }, { "epoch": 14.63, "learning_rate": 8.403810306791737e-07, "loss": 1.0317, "step": 558000 }, { "epoch": 14.64, "learning_rate": 8.36535239720849e-07, "loss": 1.0504, "step": 558500 }, { "epoch": 14.65, "learning_rate": 8.326964997455047e-07, "loss": 1.0202, "step": 559000 }, { "epoch": 14.67, "learning_rate": 8.28872483306056e-07, "loss": 1.0332, "step": 559500 }, { "epoch": 14.68, "learning_rate": 8.250478798977172e-07, "loss": 1.0449, "step": 560000 }, { "epoch": 14.69, "learning_rate": 8.212303761642099e-07, "loss": 1.0466, "step": 560500 }, { "epoch": 14.71, "learning_rate": 8.174199882869125e-07, "loss": 1.0305, "step": 561000 }, { "epoch": 14.72, "learning_rate": 8.136167324170433e-07, "loss": 1.0399, "step": 561500 }, { "epoch": 14.73, "learning_rate": 8.098282097465379e-07, "loss": 1.0443, "step": 562000 }, { "epoch": 14.75, "learning_rate": 8.060392518797067e-07, "loss": 1.0334, "step": 562500 }, { "epoch": 14.76, "learning_rate": 8.02257474260206e-07, "loss": 1.0493, "step": 563000 }, { "epoch": 14.77, "learning_rate": 7.98490434888142e-07, "loss": 1.0498, "step": 563500 }, { "epoch": 14.78, "learning_rate": 7.947230513821355e-07, "loss": 1.0275, "step": 564000 }, { "epoch": 14.8, "learning_rate": 7.909628960898113e-07, "loss": 1.0517, "step": 564500 }, { "epoch": 14.81, "learning_rate": 7.872099849494644e-07, "loss": 1.0374, "step": 565000 }, { "epoch": 14.82, "learning_rate": 7.83464333868684e-07, "loss": 1.0497, "step": 565500 }, { "epoch": 14.84, "learning_rate": 7.797259587242842e-07, "loss": 1.0433, "step": 566000 }, { "epoch": 14.85, "learning_rate": 7.75994875362239e-07, "loss": 1.0476, "step": 566500 }, { "epoch": 14.86, "learning_rate": 7.72271099597614e-07, "loss": 1.0389, "step": 567000 }, { "epoch": 14.88, "learning_rate": 7.685546472145017e-07, "loss": 1.0438, "step": 567500 }, { "epoch": 14.89, "learning_rate": 7.648603557785422e-07, "loss": 1.0359, "step": 568000 }, { "epoch": 14.9, "learning_rate": 7.6115856793579e-07, "loss": 1.0527, "step": 568500 }, { "epoch": 14.92, "learning_rate": 7.574715320461648e-07, "loss": 1.034, "step": 569000 }, { "epoch": 14.93, "learning_rate": 7.537844860442989e-07, "loss": 1.0461, "step": 569500 }, { "epoch": 14.94, "learning_rate": 7.50104841783765e-07, "loss": 1.0558, "step": 570000 }, { "epoch": 14.95, "learning_rate": 7.464326148615916e-07, "loss": 1.0249, "step": 570500 }, { "epoch": 14.97, "learning_rate": 7.427678208433686e-07, "loss": 1.0517, "step": 571000 }, { "epoch": 14.98, "learning_rate": 7.391177825104881e-07, "loss": 1.0459, "step": 571500 }, { "epoch": 14.99, "learning_rate": 7.354678859275119e-07, "loss": 1.0349, "step": 572000 }, { "epoch": 15.01, "learning_rate": 7.318254687250431e-07, "loss": 1.0185, "step": 572500 }, { "epoch": 15.02, "learning_rate": 7.281905463423178e-07, "loss": 1.0175, "step": 573000 }, { "epoch": 15.03, "learning_rate": 7.245631341868003e-07, "loss": 1.0251, "step": 573500 }, { "epoch": 15.05, "learning_rate": 7.209432476341224e-07, "loss": 1.0218, "step": 574000 }, { "epoch": 15.06, "learning_rate": 7.173309020280161e-07, "loss": 1.0179, "step": 574500 }, { "epoch": 15.07, "learning_rate": 7.137261126802514e-07, "loss": 1.0104, "step": 575000 }, { "epoch": 15.09, "learning_rate": 7.10128894870567e-07, "loss": 1.0149, "step": 575500 }, { "epoch": 15.1, "learning_rate": 7.065392638466087e-07, "loss": 1.0097, "step": 576000 }, { "epoch": 15.11, "learning_rate": 7.029572348238634e-07, "loss": 1.0106, "step": 576500 }, { "epoch": 15.13, "learning_rate": 6.99382822985597e-07, "loss": 1.0243, "step": 577000 }, { "epoch": 15.14, "learning_rate": 6.958231694146666e-07, "loss": 1.0302, "step": 577500 }, { "epoch": 15.15, "learning_rate": 6.922640220559629e-07, "loss": 1.0229, "step": 578000 }, { "epoch": 15.16, "learning_rate": 6.887125372074113e-07, "loss": 1.0252, "step": 578500 }, { "epoch": 15.18, "learning_rate": 6.851687299228074e-07, "loss": 1.0202, "step": 579000 }, { "epoch": 15.19, "learning_rate": 6.816326152234052e-07, "loss": 1.034, "step": 579500 }, { "epoch": 15.2, "learning_rate": 6.78111257209996e-07, "loss": 1.0314, "step": 580000 }, { "epoch": 15.22, "learning_rate": 6.745905571543002e-07, "loss": 1.0312, "step": 580500 }, { "epoch": 15.23, "learning_rate": 6.710775945218554e-07, "loss": 1.019, "step": 581000 }, { "epoch": 15.24, "learning_rate": 6.675723842031722e-07, "loss": 1.0374, "step": 581500 }, { "epoch": 15.26, "learning_rate": 6.640749410559011e-07, "loss": 1.0248, "step": 582000 }, { "epoch": 15.27, "learning_rate": 6.605922514508042e-07, "loss": 1.0305, "step": 582500 }, { "epoch": 15.28, "learning_rate": 6.571103714792401e-07, "loss": 1.0334, "step": 583000 }, { "epoch": 15.3, "learning_rate": 6.536363030247672e-07, "loss": 1.0313, "step": 583500 }, { "epoch": 15.31, "learning_rate": 6.501700608130337e-07, "loss": 1.0269, "step": 584000 }, { "epoch": 15.32, "learning_rate": 6.467116595365149e-07, "loss": 1.0397, "step": 584500 }, { "epoch": 15.33, "learning_rate": 6.432680070962052e-07, "loss": 1.0271, "step": 585000 }, { "epoch": 15.35, "learning_rate": 6.398253158795223e-07, "loss": 1.0367, "step": 585500 }, { "epoch": 15.36, "learning_rate": 6.363905094466666e-07, "loss": 1.0408, "step": 586000 }, { "epoch": 15.37, "learning_rate": 6.32963602356865e-07, "loss": 1.0184, "step": 586500 }, { "epoch": 15.39, "learning_rate": 6.295446091358618e-07, "loss": 1.0284, "step": 587000 }, { "epoch": 15.4, "learning_rate": 6.261335442758554e-07, "loss": 1.0337, "step": 587500 }, { "epoch": 15.41, "learning_rate": 6.227372205429946e-07, "loss": 1.0273, "step": 588000 }, { "epoch": 15.43, "learning_rate": 6.193420398182335e-07, "loss": 1.0237, "step": 588500 }, { "epoch": 15.44, "learning_rate": 6.159548307004379e-07, "loss": 1.0253, "step": 589000 }, { "epoch": 15.45, "learning_rate": 6.125756075470824e-07, "loss": 1.0333, "step": 589500 }, { "epoch": 15.47, "learning_rate": 6.092043846817916e-07, "loss": 1.033, "step": 590000 }, { "epoch": 15.48, "learning_rate": 6.058411763942787e-07, "loss": 1.0371, "step": 590500 }, { "epoch": 15.49, "learning_rate": 6.024926992769612e-07, "loss": 1.0252, "step": 591000 }, { "epoch": 15.51, "learning_rate": 5.991455467779178e-07, "loss": 1.0212, "step": 591500 }, { "epoch": 15.52, "learning_rate": 5.958064514933821e-07, "loss": 1.04, "step": 592000 }, { "epoch": 15.53, "learning_rate": 5.924754275768859e-07, "loss": 1.023, "step": 592500 }, { "epoch": 15.54, "learning_rate": 5.891591269459249e-07, "loss": 1.0252, "step": 593000 }, { "epoch": 15.56, "learning_rate": 5.85844271876021e-07, "loss": 1.0256, "step": 593500 }, { "epoch": 15.57, "learning_rate": 5.825375304011768e-07, "loss": 1.0333, "step": 594000 }, { "epoch": 15.58, "learning_rate": 5.792455056448578e-07, "loss": 1.039, "step": 594500 }, { "epoch": 15.6, "learning_rate": 5.759550170777461e-07, "loss": 1.0357, "step": 595000 }, { "epoch": 15.61, "learning_rate": 5.726726840236046e-07, "loss": 1.0314, "step": 595500 }, { "epoch": 15.62, "learning_rate": 5.693985203953675e-07, "loss": 1.0136, "step": 596000 }, { "epoch": 15.64, "learning_rate": 5.661325400713397e-07, "loss": 1.012, "step": 596500 }, { "epoch": 15.65, "learning_rate": 5.628747568951393e-07, "loss": 1.0314, "step": 597000 }, { "epoch": 15.66, "learning_rate": 5.596251846756392e-07, "loss": 1.0358, "step": 597500 }, { "epoch": 15.68, "learning_rate": 5.563838371869096e-07, "loss": 1.0075, "step": 598000 }, { "epoch": 15.69, "learning_rate": 5.531507281681564e-07, "loss": 1.027, "step": 598500 }, { "epoch": 15.7, "learning_rate": 5.499323127925932e-07, "loss": 1.0259, "step": 599000 }, { "epoch": 15.72, "learning_rate": 5.467221302031053e-07, "loss": 1.0216, "step": 599500 }, { "epoch": 15.73, "learning_rate": 5.435137855349992e-07, "loss": 1.0323, "step": 600000 }, { "epoch": 15.74, "learning_rate": 5.403137338895826e-07, "loss": 1.0327, "step": 600500 }, { "epoch": 15.75, "learning_rate": 5.371219888310187e-07, "loss": 1.0246, "step": 601000 }, { "epoch": 15.77, "learning_rate": 5.339449224256996e-07, "loss": 1.0293, "step": 601500 }, { "epoch": 15.78, "learning_rate": 5.307698144117793e-07, "loss": 1.0265, "step": 602000 }, { "epoch": 15.79, "learning_rate": 5.276030534388399e-07, "loss": 1.0233, "step": 602500 }, { "epoch": 15.81, "learning_rate": 5.244509613783047e-07, "loss": 1.0278, "step": 603000 }, { "epoch": 15.82, "learning_rate": 5.213009179600098e-07, "loss": 1.0296, "step": 603500 }, { "epoch": 15.83, "learning_rate": 5.181592617188188e-07, "loss": 1.0337, "step": 604000 }, { "epoch": 15.85, "learning_rate": 5.15026005971374e-07, "loss": 1.0203, "step": 604500 }, { "epoch": 15.86, "learning_rate": 5.11901163998709e-07, "loss": 1.0221, "step": 605000 }, { "epoch": 15.87, "learning_rate": 5.087847490461962e-07, "loss": 1.0322, "step": 605500 }, { "epoch": 15.89, "learning_rate": 5.056767743234853e-07, "loss": 1.0341, "step": 606000 } ], "max_steps": 762960, "num_train_epochs": 20, "total_flos": 7.235421273337283e+17, "trial_name": null, "trial_params": null }